diff --git a/404.html b/404.html
index dd0f7a5..6899e38 100644
--- a/404.html
+++ b/404.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=404_file_not_found ><a href="#404_file_not_found" class=header-anchor >404: File not found</a></h1> <p>The requested file was not found.</p> <p>Please <a href="/website/">click here</a> to go to the home page, or have a look at the website modules below.</p> <h2 id=modules ><a href="#modules" class=header-anchor >Modules</a></h2> <ul> <li><p><a href="./modules/1-intro-general-overview">Module 1: Introduction &amp; General Overview</a></p> <li><p><a href="./modules/2a-pytorch-tensors">Module 2a: Pytorch Tensors</a></p> <li><p><a href="./modules/2b-automatic-differentiation">Module 2b: Automatic Differentiation</a></p> <li><p><a href="./modules/3-loss-functions-for-classification">Module 3: Loss functions for classification</a></p> <li><p><a href="./modules/4-optimization-for-deep-learning">Module 4: Optimization for Deep Learning</a></p> <li><p><a href="./modules/5-stacking-layers">Module 5: Stacking layers</a></p> <li><p><a href="./modules/6-convolutional-neural-network">Module 6: Convolutional Neural Network</a></p> <li><p><a href="./modules/7-dataloading.md">Module 7: Dataloading</a></p> <li><p><a href="./modules/8a-embedding-layers.md">Module 8a: Embedding layers</a></p> <li><p><a href="./modules/8b-collaborative-filtering.md">Module 8b: Collaborative Filtering</a></p> <li><p><a href="./modules/9-autoencoders">Modules 9: Autoencoders</a></p> <li><p><a href="./modules/10-generative-adversarial-networks">Module 10: Generative Adversarial Networks</a></p> <li><p><a href="./modules/11a-recurrent-neural-networks-theory">Module 11a: Recurrent Neural Networks theory</a></p> <li><p><a href="./modules/11b-recurrent-neural-networks-practice">Module 11b: Recurrent Neural Networks practice</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=404_file_not_found ><a href="#404_file_not_found" class=header-anchor >404: File not found</a></h1> <p>The requested file was not found.</p> <p>Please <a href="/website/">click here</a> to go to the home page, or have a look at the website modules below.</p> <h2 id=modules ><a href="#modules" class=header-anchor >Modules</a></h2> <ul> <li><p><a href="./modules/1-intro-general-overview">Module 1: Introduction &amp; General Overview</a></p> <li><p><a href="./modules/2a-pytorch-tensors">Module 2a: Pytorch Tensors</a></p> <li><p><a href="./modules/2b-automatic-differentiation">Module 2b: Automatic Differentiation</a></p> <li><p><a href="./modules/3-loss-functions-for-classification">Module 3: Loss functions for classification</a></p> <li><p><a href="./modules/4-optimization-for-deep-learning">Module 4: Optimization for Deep Learning</a></p> <li><p><a href="./modules/5-stacking-layers">Module 5: Stacking layers</a></p> <li><p><a href="./modules/6-convolutional-neural-network">Module 6: Convolutional Neural Network</a></p> <li><p><a href="./modules/7-dataloading.md">Module 7: Dataloading</a></p> <li><p><a href="./modules/8a-embedding-layers.md">Module 8a: Embedding layers</a></p> <li><p><a href="./modules/8b-collaborative-filtering.md">Module 8b: Collaborative Filtering</a></p> <li><p><a href="./modules/9-autoencoders">Modules 9: Autoencoders</a></p> <li><p><a href="./modules/10-generative-adversarial-networks">Module 10: Generative Adversarial Networks</a></p> <li><p><a href="./modules/11a-recurrent-neural-networks-theory">Module 11a: Recurrent Neural Networks theory</a></p> <li><p><a href="./modules/11b-recurrent-neural-networks-practice">Module 11b: Recurrent Neural Networks practice</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/homework/1-mlp-from-scratch/index.html b/homework/1-mlp-from-scratch/index.html
index e9405c7..53545c8 100644
--- a/homework/1-mlp-from-scratch/index.html
+++ b/homework/1-mlp-from-scratch/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item active" href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=homework_1_mlp_from_scratch ><a href="#homework_1_mlp_from_scratch" class=header-anchor >Homework 1: MLP from scratch</a></h1> <p>Homework 1 is in the form of a jupyter notebook. You must complete it and submit it on moodle &#40;for students enrolled on this course&#41;.</p> <p><a href="https://github.com/dataflowr/notebooks/blob/master/HW1/hw1_mlp.ipynb">The Jupyter notebook</a></p> <p>This homework will run fine on regular CPU &#40;no need for GPU&#41;. If you want to run it locally &#40;on your laptop&#41;, you can follow the procedure described in <a href="/website/module0/">Module 0</a>. Note that if you cloned the <a href="https://github.com/dataflowr/notebooks">GitHub repository</a>, the homework will be in the folder <code>/notebooks/HW1</code></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item active" href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=homework_1_mlp_from_scratch ><a href="#homework_1_mlp_from_scratch" class=header-anchor >Homework 1: MLP from scratch</a></h1> <p>Homework 1 is in the form of a jupyter notebook. You must complete it and submit it on moodle &#40;for students enrolled on this course&#41;.</p> <p><a href="https://github.com/dataflowr/notebooks/blob/master/HW1/hw1_mlp.ipynb">The Jupyter notebook</a></p> <p>This homework will run fine on regular CPU &#40;no need for GPU&#41;. If you want to run it locally &#40;on your laptop&#41;, you can follow the procedure described in <a href="/website/module0/">Module 0</a>. Note that if you cloned the <a href="https://github.com/dataflowr/notebooks">GitHub repository</a>, the homework will be in the folder <code>/notebooks/HW1</code></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/homework/2-CAM-adversarial/index.html b/homework/2-CAM-adversarial/index.html
index 8415c9d..34a248a 100644
--- a/homework/2-CAM-adversarial/index.html
+++ b/homework/2-CAM-adversarial/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item active" href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=homework_2_class_activation_map_and_adversarial_examples ><a href="#homework_2_class_activation_map_and_adversarial_examples" class=header-anchor >Homework 2: Class Activation Map and adversarial examples</a></h1> <h2 id=can_you_see_the_cat_below_no_have_a_look_at_the_code_- ><a href="#can_you_see_the_cat_below_no_have_a_look_at_the_code_-" class=header-anchor >Can you see the cat below? No? Have a look at the code ;-&#41;</a></h2> <img src="/website/assets/CAM_bm.jpg" style="width: 620px; height: auto; display: inline"> <p>Homework 2 is in the form of a jupyter notebook. You must complete it and submit it on moodle &#40;for students enrolled on this course&#41;.</p> <p><a href="https://github.com/dataflowr/notebooks/blob/master/HW2/HW2_CAM_Adversarial.ipynb">The Jupyter notebook</a></p> <p>This homework will run fine on regular CPU &#40;no need for GPU&#41;. If you want to run it locally &#40;on your laptop&#41;, you can follow the procedure described in <a href="/website/module0/">Module 0</a>. Note that if you cloned the <a href="https://github.com/dataflowr/notebooks">GitHub repository</a>, the homework will be in the folder <code>/notebooks/HW2</code></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item active" href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=homework_2_class_activation_map_and_adversarial_examples ><a href="#homework_2_class_activation_map_and_adversarial_examples" class=header-anchor >Homework 2: Class Activation Map and adversarial examples</a></h1> <h2 id=can_you_see_the_cat_below_no_have_a_look_at_the_code_- ><a href="#can_you_see_the_cat_below_no_have_a_look_at_the_code_-" class=header-anchor >Can you see the cat below? No? Have a look at the code ;-&#41;</a></h2> <img src="/website/assets/CAM_bm.jpg" style="width: 620px; height: auto; display: inline"> <p>Homework 2 is in the form of a jupyter notebook. You must complete it and submit it on moodle &#40;for students enrolled on this course&#41;.</p> <p><a href="https://github.com/dataflowr/notebooks/blob/master/HW2/HW2_CAM_Adversarial.ipynb">The Jupyter notebook</a></p> <p>This homework will run fine on regular CPU &#40;no need for GPU&#41;. If you want to run it locally &#40;on your laptop&#41;, you can follow the procedure described in <a href="/website/module0/">Module 0</a>. Note that if you cloned the <a href="https://github.com/dataflowr/notebooks">GitHub repository</a>, the homework will be in the folder <code>/notebooks/HW2</code></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/homework/3-VAE/index.html b/homework/3-VAE/index.html
index 8493381..61d3289 100644
--- a/homework/3-VAE/index.html
+++ b/homework/3-VAE/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item active" href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=homework_3_vae_for_mnist_clustering_and_generation ><a href="#homework_3_vae_for_mnist_clustering_and_generation" class=header-anchor >Homework 3: VAE for MNIST clustering and generation</a></h1> <img src="/website/assets/mnist_disentangled.gif" style="width: 620px; height: auto; display: inline"> <p><a href="https://github.com/Schlumberger/joint-vae">Image source</a></p> <p>Homework 3 is in the form of a jupyter notebook. You must complete it and submit it on moodle &#40;for students enrolled on this course&#41;.</p> <p><a href="https://github.com/dataflowr/notebooks/blob/master/HW3/VAE_clustering_empty.ipynb">The Jupyter notebook</a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item active" href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=homework_3_vae_for_mnist_clustering_and_generation ><a href="#homework_3_vae_for_mnist_clustering_and_generation" class=header-anchor >Homework 3: VAE for MNIST clustering and generation</a></h1> <img src="/website/assets/mnist_disentangled.gif" style="width: 620px; height: auto; display: inline"> <p><a href="https://github.com/Schlumberger/joint-vae">Image source</a></p> <p>Homework 3 is in the form of a jupyter notebook. You must complete it and submit it on moodle &#40;for students enrolled on this course&#41;.</p> <p><a href="https://github.com/dataflowr/notebooks/blob/master/HW3/VAE_clustering_empty.ipynb">The Jupyter notebook</a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/index.html b/index.html
index 90fdb7d..a8f10a9 100644
--- a/index.html
+++ b/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=deep_learning_do_it_yourself ><a href="#deep_learning_do_it_yourself" class=header-anchor >Deep Learning Do It Yourself&#33;</a></h1> <p style="font-size: 1.15em; color: #333; line-height:1.5em"> This site collects resources to learn Deep Learning in the form of Modules available through the sidebar on the left. As a student, you can walk through the modules at your own pace and interact with others thanks to the associated <a href="https://discord.gg/nZQ3fe3">Discord server</a>. You don’t need any special hardware or software.</p> <h2 id=practical_deep_learning_course ><a href="#practical_deep_learning_course" class=header-anchor >Practical deep learning course</a></h2> <p>The main goal of the course is to allow students to understand papers, blog posts and codes available online and to adapt them to their projects as soon as possible. In particular, we avoid the use of any high-level neural networks API and focus on the <a href="https://pytorch.org/">PyTorch</a> library in Python.</p> <p>The course is divided into sessions &#40;containing possibly several modules&#41;, each session requiring a significant amount of coding. At the end of this course, students were able to read very recent papers and reproduce &#40;or even ameliorate&#41; their experiments. </p> <p>All the code used in this course is available on the GitHub repository <a href="https://github.com/dataflowr/notebooks">dataflowr/notebooks</a>. You will find the solutions to the practicals on this repo&#33; You can fork the repo if you want to run the code locally: <a href="https://docs.github.com/en/get-started/quickstart/fork-a-repo">GitHub Docs about fork</a> then follow the steps in <a href="./modules/0-sotfware-installation/">Module 0</a>. Most of the code will not require a GPU. </p> <p>⚠ When a GPU is required , you can launch the code on colab by following the corresponding link given in the module &#40;see for example <a href="./modules/1-intro-general-overview/">Module 1</a>&#41;.</p> <p>Pre-requisites:</p> <ul> <li><p>Mathematics: basics of linear algebra, probability, differential calculus and optimization</p> <li><p>Programming: Python. Test your proficiency: <a href="https://dataflowr.github.io/quiz/python.html">quiz</a></p> </ul> <h3 id=session_1_-_finetuning_vgg ><a href="#session_1_-_finetuning_vgg" class=header-anchor >🌻 Session 1 - Finetuning VGG</a></h3> <p>Start right away and train a deep neural network on a GPU with <a href="./modules/1-intro-general-overview/">Module 1 - Introduction &amp; General Overview</a></p> <p>Be sure to build your own classifier with more dogs and cats in the practicals. <details> <summary>Things to remember</summary> </p> <blockquote> <ul> <li><p>you do not need to understand everything to run a deep learning model&#33; But the main goal of this course will be to come back to each step done today and understand them...</p> <li><p>to use the dataloader from Pytorch, you need to follow the API &#40;i.e. for classification store your dataset in folders&#41;</p> <li><p>using a pretrained model and modifying it to adapt it to a similar task is easy. </p> <li><p>if you do not understand why we take this loss, that&#39;s fine, we&#39;ll cover that in Module 3.</p> <li><p>even with a GPU, avoid unnecessary computations&#33;</p> </ul> </blockquote> </details> <h3 id=session_2_-_pytorch_tensors_and_autodiff ><a href="#session_2_-_pytorch_tensors_and_autodiff" class=header-anchor >🌻 Session 2 - PyTorch tensors and Autodiff</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/2a-pytorch-tensors/">Module 2a - PyTorch tensors</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/2b-automatic-differentiation/">Module 2b - Automatic differentiation</a> &#43; Practicals</p> <li><p>MLP from scratch start of <a href="https://dataflowr.github.io/website/homework/1-mlp-from-scratch/">HW1</a> </p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module2/AD_with_dual_numbers_Julia.ipynb">another look at autodiff with dual numbers and Julia</a></p> </ul> <details> <summary>Things to remember</summary> <blockquote> <ul> <li><p>Pytorch tensors &#61; Numpy on GPU &#43; gradients&#33;</p> <li><p>in deep learning, <a href="https://numpy.org/doc/stable/user/basics.broadcasting.html">broadcasting</a> is used everywhere. The rules are the same as for Numpy.</p> <li><p>Automatic differentiation is not only the chain rule&#33; Backpropagation algorithm &#40;or dual numbers&#41; is a clever algorithm to implement automatic differentiation...</p> </ul> </blockquote> </details> <h3 id=session_3 ><a href="#session_3" class=header-anchor >🌻 Session 3</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/3-loss-functions-for-classification/">Module 3 - Loss function for classification</a> </p> <li><p><a href="https://dataflowr.github.io/website/modules/4-optimization-for-deep-learning/">Module 4 - Optimization for deep learning</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/5-stacking-layers/">Module 5 - Stacking layers</a> and overfitting a MLP on CIFAR10</p> <li><p><a href="https://dataflowr.github.io/website/modules/6-convolutional-neural-network/">Module 6: Convolutional neural network</a></p> <li><p>how to regularize with dropout and uncertainty estimation with MC Dropout: <a href="https://dataflowr.github.io/website/modules/15-dropout/">Module 15 - Dropout</a></p> </ul> <details> <summary>Things to remember</summary> <blockquote> <ul> <li><p>Loss vs Accuracy. Know your loss for a classification task&#33;</p> <li><p>know your optimizer &#40;Module 4&#41;</p> <li><p>know how to build a neural net with torch.nn.module &#40;Module 5&#41;</p> <li><p>know how to use convolution and pooling layers &#40;kernel, stride, padding&#41;</p> <li><p>know how to use dropout </p> </ul> </blockquote> </details> <h3 id=session_4 ><a href="#session_4" class=header-anchor >🌻 Session 4</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/7-dataloading/">Module 7 - Dataloading</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/8a-embedding-layers/">Module 8a - Embedding layers</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/8b-collaborative-filtering/">Module 8b - Collaborative filtering</a> and build your own recommender system: <a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb">08&#95;collaborative&#95;filtering&#95;empty.ipynb</a> &#40;on a larger dataset <a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb">08&#95;collaborative&#95;filtering&#95;1M.ipynb</a>&#41;</p> <li><p><a href="https://dataflowr.github.io/website/modules/8c-word2vec/">Module 8c - Word2vec</a> and build your own word embedding <a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_Word2vec_pytorch_empty.ipynb">08&#95;Word2vec&#95;pytorch&#95;empty.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/16-batchnorm/">Module 16 - Batchnorm</a> and check your understanding with <a href="https://github.com/dataflowr/notebooks/blob/master/Module16/16_simple_batchnorm_eval.ipynb">16&#95;simple&#95;batchnorm&#95;eval.ipynb</a> and more <a href="https://github.com/dataflowr/notebooks/blob/master/Module16/16_batchnorm_simple.ipynb">16&#95;batchnorm&#95;simple.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/17-resnets/">Module 17 - Resnets</a></p> <li><p>start of <a href="https://dataflowr.github.io/website/homework/2-CAM-adversarial/">Homework 2: Class Activation Map and adversarial examples</a></p> </ul> <details> <summary>Things to remember</summary> <blockquote> <ul> <li><p>know how to use dataloader</p> <li><p>to deal with categorical variables in deep learning, use embeddings</p> <li><p>in the case of word embedding, starting in an unsupervised setting, we built a supervised task &#40;i.e. predicting central / context words in a window&#41; and learned the representation thanks to negative sampling</p> <li><p>know your batchnorm</p> <li><p>architectures with skip connections allows deeper models</p> </ul> </blockquote> </details> <h3 id=session_5 ><a href="#session_5" class=header-anchor >🌻 Session 5</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/9a-autoencoders/">Module 9a: Autoencoders</a> and code your noisy autoencoder <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb">09&#95;AE&#95;NoisyAE.ipynb</a></p> <li><p><a href="">Module 10: Generative Adversarial Networks</a> and code your GAN, Conditional GAN and InfoGAN <a href="https://github.com/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb">10&#95;GAN&#95;double&#95;moon.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/13-siamese/">Module 13: Siamese Networks and Representation Learning</a></p> <li><p>start of <a href="https://dataflowr.github.io/website/homework/3-VAE/">Homework 3: VAE for MNIST clustering and generation</a></p> </ul> <h3 id=session_6 ><a href="#session_6" class=header-anchor >🌻 Session 6</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/11a-recurrent-neural-networks-theory/">Module 11a - Recurrent Neural Networks theory</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/11b-recurrent-neural-networks-practice/">Module 11b - Recurrent Neural Networks practice</a> and predict engine failure with <a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_predicitions_RNN_empty.ipynb">11&#95;predicitions&#95;RNN&#95;empty.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/11c-batches-with-sequences/">Module 11c - Batches with sequences in Pytorch</a></p> </ul> <h3 id=session_7 ><a href="#session_7" class=header-anchor >🌻 Session 7</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/12-attention/">Module 12 - Attention and Transformers</a></p> <li><p>Correcting the PyTorch tutorial on attention in seq2seq: <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention.ipynb">12&#95;seq2seq&#95;attention.ipynb</a></p> <li><p>Build your own microGPT: <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb">GPT&#95;hist.ipynb</a></p> </ul> <h3 id=session_8 ><a href="#session_8" class=header-anchor >🌻 Session 8</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/9b-unet/">Module 9b - UNets</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/9c-flows/">Module 9c - Flows</a></p> <li><p>Build your own Real NVP: <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_empty.ipynb">Normalizing&#95;flows&#95;empty.ipynb</a></p> </ul> <h3 id=session_9 ><a href="#session_9" class=header-anchor >🌻 Session 9</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/18a-diffusion/">Module 18a - Denoising Diffusion Probabilistic Models</a></p> <li><p>Train your own DDPM on MNIST: <a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_empty.ipynb">ddpm&#95;nano&#95;empty.ipynb</a></p> <li><p>Finetuning on CIFAR10: <a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_micro_sol.ipynb">ddpm&#95;micro&#95;sol.ipynb</a></p> </ul> <p>For more updates: <a href="https://twitter.com/marc_lelarge"><img src="https://img.shields.io/twitter/url/https/twitter.com/marc_lelarge.svg?style&#61;social&amp;label&#61;Follow&#37;20&#37;40marc_lelarge" alt="Twitter URL" /></a> </p> <p>and check the </p> <h1 id=a_hrefhttpsgithubcomdataflowrnotebooksgithub_repository_dataflowrnotebooks ><a href="#a_hrefhttpsgithubcomdataflowrnotebooksgithub_repository_dataflowrnotebooks" class=header-anchor ><a href="https://github.com/dataflowr/notebooks">GitHub repository: dataflowr/notebooks</a></a></h1> <h2 id=curators ><a href="#curators" class=header-anchor >Curators</a></h2> <p><a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, <a href="https://abursuc.github.io/">Andrei Bursuc</a> with <a href="https://jill-jenn.net/">Jill-Jênn Vie</a></p> <h2 id=course_in_a_hurry ><a href="#course_in_a_hurry" class=header-anchor >Course in a hurry</a></h2> <p><strong>Super fast track</strong> to learn the basics of deep learning from scratch:</p> <ul> <li><p>Have a look at the <a href="https://dataflowr.github.io/slides/module1.html">slides</a> of <a href="./modules/1-intro-general-overview">Module 1: Introduction &amp; General Overview</a></p> <li><p>Run the <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">notebook</a> &#40;or in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a>&#41; of <a href="./modules/2a-pytorch-tensors">Module 2a: Pytorch Tensors</a></p> <li><p>Run the <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">notebook</a> &#40;or in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">colab</a>&#41; of <a href="./modules/2b-automatic-differentiation">Module 2b: Automatic Differentiation</a></p> <li><p>Check the <a href="./modules/3-loss-functions-for-classification/#minimal_working_examples">Minimal working examples</a> of <a href="./modules/3-loss-functions-for-classification">Module 3: Loss functions for classification</a>. If you do not understand, have a look at the <a href="https://dataflowr.github.io/slides/module3.html">slides</a>.</p> <li><p>Have a look at the <a href="https://dataflowr.github.io/slides/module4.html">slides</a> of <a href="./modules/4-optimization-for-deep-learning">Module 4: Optimization for Deep Learning</a></p> <li><p>Try playback speed 1.5 for the <a href="https://youtu.be/OiyZXdnLHcI?t&#61;149">video</a> from <a href="./modules/5-stacking-layers">Module 5: Stacking layers</a>.</p> <li><p>Run the <a href="https://github.com/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">notebook</a> &#40;or in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">colab</a>&#41; of <a href="./modules/6-convolutional-neural-network">Module 6: Convolutional Neural Network</a></p> <li><p>Try playback speed 2 for the <a href="https://youtu.be/vm-ZusIUkiY?t&#61;133">video</a> from <a href="./modules/7-dataloading">Module 7: Dataloading</a></p> <li><p>Have a look at the <a href="https://dataflowr.github.io/slides/module8a.html">slides</a> of <a href="./modules/8a-embedding-layers">Module 8a: Embedding layers</a></p> <li><p>Well done&#33; Now you have time to enjoy deep learning&#33;</p> </ul> <h2 id=for_contributors ><a href="#for_contributors" class=header-anchor >For contributors</a></h2> <p>Join the <a href="https://github.com/dataflowr">GitHub repo dataflowr</a> and make a pull request. <a href="https://yangsu.github.io/pull-request-tutorial/">What are pull requests?</a></p> <p>Thanks to <a href="https://github.com/dhuynh95">Daniel Huynh</a>, <a href="https://github.com/ericdaat">Eric Daoud</a>, <a href="https://github.com/SimonCoste">Simon Coste</a></p> <p>Materials from this site is used for courses at ENS and X. </p> <img src="/website/assets/ENS_logo.jpg" style="width: 400px; height: auto; display: inline"> <img src="/website/assets/X_logo.png" style="margin-left:1em; width: 180px; height: auto; display: inline"> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=deep_learning_do_it_yourself ><a href="#deep_learning_do_it_yourself" class=header-anchor >Deep Learning Do It Yourself&#33;</a></h1> <p style="font-size: 1.15em; color: #333; line-height:1.5em"> This site collects resources to learn Deep Learning in the form of Modules available through the sidebar on the left. As a student, you can walk through the modules at your own pace and interact with others thanks to the associated <a href="https://discord.gg/nZQ3fe3">Discord server</a>. You don’t need any special hardware or software.</p> <h2 id=practical_deep_learning_course ><a href="#practical_deep_learning_course" class=header-anchor >Practical deep learning course</a></h2> <p>The main goal of the course is to allow students to understand papers, blog posts and codes available online and to adapt them to their projects as soon as possible. In particular, we avoid the use of any high-level neural networks API and focus on the <a href="https://pytorch.org/">PyTorch</a> library in Python.</p> <p>The course is divided into sessions &#40;containing possibly several modules&#41;, each session requiring a significant amount of coding. At the end of this course, students were able to read very recent papers and reproduce &#40;or even ameliorate&#41; their experiments. </p> <p>All the code used in this course is available on the GitHub repository <a href="https://github.com/dataflowr/notebooks">dataflowr/notebooks</a>. You will find the solutions to the practicals on this repo&#33; You can fork the repo if you want to run the code locally: <a href="https://docs.github.com/en/get-started/quickstart/fork-a-repo">GitHub Docs about fork</a> then follow the steps in <a href="./modules/0-sotfware-installation/">Module 0</a>. Most of the code will not require a GPU. </p> <p>⚠ When a GPU is required , you can launch the code on colab by following the corresponding link given in the module &#40;see for example <a href="./modules/1-intro-general-overview/">Module 1</a>&#41;.</p> <p>Pre-requisites:</p> <ul> <li><p>Mathematics: basics of linear algebra, probability, differential calculus and optimization</p> <li><p>Programming: Python. Test your proficiency: <a href="https://dataflowr.github.io/quiz/python.html">quiz</a></p> </ul> <h3 id=session_1_-_finetuning_vgg ><a href="#session_1_-_finetuning_vgg" class=header-anchor >🌻 Session 1 - Finetuning VGG</a></h3> <p>Start right away and train a deep neural network on a GPU with <a href="./modules/1-intro-general-overview/">Module 1 - Introduction &amp; General Overview</a></p> <p>Be sure to build your own classifier with more dogs and cats in the practicals. <details> <summary>Things to remember</summary> </p> <blockquote> <ul> <li><p>you do not need to understand everything to run a deep learning model&#33; But the main goal of this course will be to come back to each step done today and understand them...</p> <li><p>to use the dataloader from Pytorch, you need to follow the API &#40;i.e. for classification store your dataset in folders&#41;</p> <li><p>using a pretrained model and modifying it to adapt it to a similar task is easy. </p> <li><p>if you do not understand why we take this loss, that&#39;s fine, we&#39;ll cover that in Module 3.</p> <li><p>even with a GPU, avoid unnecessary computations&#33;</p> </ul> </blockquote> </details> <h3 id=session_2_-_pytorch_tensors_and_autodiff ><a href="#session_2_-_pytorch_tensors_and_autodiff" class=header-anchor >🌻 Session 2 - PyTorch tensors and Autodiff</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/2a-pytorch-tensors/">Module 2a - PyTorch tensors</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/2b-automatic-differentiation/">Module 2b - Automatic differentiation</a> &#43; Practicals</p> <li><p>MLP from scratch start of <a href="https://dataflowr.github.io/website/homework/1-mlp-from-scratch/">HW1</a> </p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module2/AD_with_dual_numbers_Julia.ipynb">another look at autodiff with dual numbers and Julia</a></p> </ul> <details> <summary>Things to remember</summary> <blockquote> <ul> <li><p>Pytorch tensors &#61; Numpy on GPU &#43; gradients&#33;</p> <li><p>in deep learning, <a href="https://numpy.org/doc/stable/user/basics.broadcasting.html">broadcasting</a> is used everywhere. The rules are the same as for Numpy.</p> <li><p>Automatic differentiation is not only the chain rule&#33; Backpropagation algorithm &#40;or dual numbers&#41; is a clever algorithm to implement automatic differentiation...</p> </ul> </blockquote> </details> <h3 id=session_3 ><a href="#session_3" class=header-anchor >🌻 Session 3</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/3-loss-functions-for-classification/">Module 3 - Loss function for classification</a> </p> <li><p><a href="https://dataflowr.github.io/website/modules/4-optimization-for-deep-learning/">Module 4 - Optimization for deep learning</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/5-stacking-layers/">Module 5 - Stacking layers</a> and overfitting a MLP on CIFAR10</p> <li><p><a href="https://dataflowr.github.io/website/modules/6-convolutional-neural-network/">Module 6: Convolutional neural network</a></p> <li><p>how to regularize with dropout and uncertainty estimation with MC Dropout: <a href="https://dataflowr.github.io/website/modules/15-dropout/">Module 15 - Dropout</a></p> </ul> <details> <summary>Things to remember</summary> <blockquote> <ul> <li><p>Loss vs Accuracy. Know your loss for a classification task&#33;</p> <li><p>know your optimizer &#40;Module 4&#41;</p> <li><p>know how to build a neural net with torch.nn.module &#40;Module 5&#41;</p> <li><p>know how to use convolution and pooling layers &#40;kernel, stride, padding&#41;</p> <li><p>know how to use dropout </p> </ul> </blockquote> </details> <h3 id=session_4 ><a href="#session_4" class=header-anchor >🌻 Session 4</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/7-dataloading/">Module 7 - Dataloading</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/8a-embedding-layers/">Module 8a - Embedding layers</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/8b-collaborative-filtering/">Module 8b - Collaborative filtering</a> and build your own recommender system: <a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb">08&#95;collaborative&#95;filtering&#95;empty.ipynb</a> &#40;on a larger dataset <a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb">08&#95;collaborative&#95;filtering&#95;1M.ipynb</a>&#41;</p> <li><p><a href="https://dataflowr.github.io/website/modules/8c-word2vec/">Module 8c - Word2vec</a> and build your own word embedding <a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_Word2vec_pytorch_empty.ipynb">08&#95;Word2vec&#95;pytorch&#95;empty.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/16-batchnorm/">Module 16 - Batchnorm</a> and check your understanding with <a href="https://github.com/dataflowr/notebooks/blob/master/Module16/16_simple_batchnorm_eval.ipynb">16&#95;simple&#95;batchnorm&#95;eval.ipynb</a> and more <a href="https://github.com/dataflowr/notebooks/blob/master/Module16/16_batchnorm_simple.ipynb">16&#95;batchnorm&#95;simple.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/17-resnets/">Module 17 - Resnets</a></p> <li><p>start of <a href="https://dataflowr.github.io/website/homework/2-CAM-adversarial/">Homework 2: Class Activation Map and adversarial examples</a></p> </ul> <details> <summary>Things to remember</summary> <blockquote> <ul> <li><p>know how to use dataloader</p> <li><p>to deal with categorical variables in deep learning, use embeddings</p> <li><p>in the case of word embedding, starting in an unsupervised setting, we built a supervised task &#40;i.e. predicting central / context words in a window&#41; and learned the representation thanks to negative sampling</p> <li><p>know your batchnorm</p> <li><p>architectures with skip connections allows deeper models</p> </ul> </blockquote> </details> <h3 id=session_5 ><a href="#session_5" class=header-anchor >🌻 Session 5</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/9a-autoencoders/">Module 9a: Autoencoders</a> and code your noisy autoencoder <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb">09&#95;AE&#95;NoisyAE.ipynb</a></p> <li><p><a href="">Module 10: Generative Adversarial Networks</a> and code your GAN, Conditional GAN and InfoGAN <a href="https://github.com/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb">10&#95;GAN&#95;double&#95;moon.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/13-siamese/">Module 13: Siamese Networks and Representation Learning</a></p> <li><p>start of <a href="https://dataflowr.github.io/website/homework/3-VAE/">Homework 3: VAE for MNIST clustering and generation</a></p> </ul> <h3 id=session_6 ><a href="#session_6" class=header-anchor >🌻 Session 6</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/11a-recurrent-neural-networks-theory/">Module 11a - Recurrent Neural Networks theory</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/11b-recurrent-neural-networks-practice/">Module 11b - Recurrent Neural Networks practice</a> and predict engine failure with <a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_predicitions_RNN_empty.ipynb">11&#95;predicitions&#95;RNN&#95;empty.ipynb</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/11c-batches-with-sequences/">Module 11c - Batches with sequences in Pytorch</a></p> </ul> <h3 id=session_7 ><a href="#session_7" class=header-anchor >🌻 Session 7</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/12-attention/">Module 12 - Attention and Transformers</a></p> <li><p>Correcting the PyTorch tutorial on attention in seq2seq: <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention.ipynb">12&#95;seq2seq&#95;attention.ipynb</a></p> <li><p>Build your own microGPT: <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb">GPT&#95;hist.ipynb</a></p> </ul> <h3 id=session_8 ><a href="#session_8" class=header-anchor >🌻 Session 8</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/9b-unet/">Module 9b - UNets</a></p> <li><p><a href="https://dataflowr.github.io/website/modules/9c-flows/">Module 9c - Flows</a></p> <li><p>Build your own Real NVP: <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_empty.ipynb">Normalizing&#95;flows&#95;empty.ipynb</a></p> </ul> <h3 id=session_9 ><a href="#session_9" class=header-anchor >🌻 Session 9</a></h3> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/18a-diffusion/">Module 18a - Denoising Diffusion Probabilistic Models</a></p> <li><p>Train your own DDPM on MNIST: <a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_empty.ipynb">ddpm&#95;nano&#95;empty.ipynb</a></p> <li><p>Finetuning on CIFAR10: <a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_micro_sol.ipynb">ddpm&#95;micro&#95;sol.ipynb</a></p> </ul> <p>For more updates: <a href="https://twitter.com/marc_lelarge"><img src="https://img.shields.io/twitter/url/https/twitter.com/marc_lelarge.svg?style&#61;social&amp;label&#61;Follow&#37;20&#37;40marc_lelarge" alt="Twitter URL" /></a> </p> <p>and check the </p> <h1 id=a_hrefhttpsgithubcomdataflowrnotebooksgithub_repository_dataflowrnotebooks ><a href="#a_hrefhttpsgithubcomdataflowrnotebooksgithub_repository_dataflowrnotebooks" class=header-anchor ><a href="https://github.com/dataflowr/notebooks">GitHub repository: dataflowr/notebooks</a></a></h1> <h2 id=curators ><a href="#curators" class=header-anchor >Curators</a></h2> <p><a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, <a href="https://abursuc.github.io/">Andrei Bursuc</a> with <a href="https://jill-jenn.net/">Jill-Jênn Vie</a></p> <h2 id=course_in_a_hurry ><a href="#course_in_a_hurry" class=header-anchor >Course in a hurry</a></h2> <p><strong>Super fast track</strong> to learn the basics of deep learning from scratch:</p> <ul> <li><p>Have a look at the <a href="https://dataflowr.github.io/slides/module1.html">slides</a> of <a href="./modules/1-intro-general-overview">Module 1: Introduction &amp; General Overview</a></p> <li><p>Run the <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">notebook</a> &#40;or in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a>&#41; of <a href="./modules/2a-pytorch-tensors">Module 2a: Pytorch Tensors</a></p> <li><p>Run the <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">notebook</a> &#40;or in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">colab</a>&#41; of <a href="./modules/2b-automatic-differentiation">Module 2b: Automatic Differentiation</a></p> <li><p>Check the <a href="./modules/3-loss-functions-for-classification/#minimal_working_examples">Minimal working examples</a> of <a href="./modules/3-loss-functions-for-classification">Module 3: Loss functions for classification</a>. If you do not understand, have a look at the <a href="https://dataflowr.github.io/slides/module3.html">slides</a>.</p> <li><p>Have a look at the <a href="https://dataflowr.github.io/slides/module4.html">slides</a> of <a href="./modules/4-optimization-for-deep-learning">Module 4: Optimization for Deep Learning</a></p> <li><p>Try playback speed 1.5 for the <a href="https://youtu.be/OiyZXdnLHcI?t&#61;149">video</a> from <a href="./modules/5-stacking-layers">Module 5: Stacking layers</a>.</p> <li><p>Run the <a href="https://github.com/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">notebook</a> &#40;or in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">colab</a>&#41; of <a href="./modules/6-convolutional-neural-network">Module 6: Convolutional Neural Network</a></p> <li><p>Try playback speed 2 for the <a href="https://youtu.be/vm-ZusIUkiY?t&#61;133">video</a> from <a href="./modules/7-dataloading">Module 7: Dataloading</a></p> <li><p>Have a look at the <a href="https://dataflowr.github.io/slides/module8a.html">slides</a> of <a href="./modules/8a-embedding-layers">Module 8a: Embedding layers</a></p> <li><p>Well done&#33; Now you have time to enjoy deep learning&#33;</p> </ul> <h2 id=for_contributors ><a href="#for_contributors" class=header-anchor >For contributors</a></h2> <p>Join the <a href="https://github.com/dataflowr">GitHub repo dataflowr</a> and make a pull request. <a href="https://yangsu.github.io/pull-request-tutorial/">What are pull requests?</a></p> <p>Thanks to <a href="https://github.com/dhuynh95">Daniel Huynh</a>, <a href="https://github.com/ericdaat">Eric Daoud</a>, <a href="https://github.com/SimonCoste">Simon Coste</a></p> <p>Materials from this site is used for courses at ENS and X. </p> <img src="/website/assets/ENS_logo.jpg" style="width: 400px; height: auto; display: inline"> <img src="/website/assets/X_logo.png" style="margin-left:1em; width: 180px; height: auto; display: inline"> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/0-julia-setup/index.html b/modules/0-julia-setup/index.html
index 52d4d3f..919328e 100644
--- a/modules/0-julia-setup/index.html
+++ b/modules/0-julia-setup/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=projects_in_julia ><a href="#projects_in_julia" class=header-anchor >Projects in Julia </a></h1> <p>Even for a personal project, we recommend to make a simple Julia package &#40;like in python you use virtual environment&#41;. This is a simple tutorial to help you coding an app in Julia.</p> <h2 id=prerequisite ><a href="#prerequisite" class=header-anchor >Prerequisite</a></h2> <p>You need to have <a href="https://julialang.org/downloads/">Julia</a> installed and a <a href="https://docs.github.com/en/get-started/signing-up-for-github/signing-up-for-a-new-github-account">GitHub</a> account.</p> <h2 id=creating_the_julia_package ><a href="#creating_the_julia_package" class=header-anchor >Creating the Julia Package</a></h2> <p>We&#39;ll be using <a href="https://github.com/tpapp/PkgSkeleton.jl">PkgSkeleton.jl</a> which allows to simplify the creation of packages. First check your git configuration &#40;as it will be used to create the package&#41; with:</p> <pre><code class="julia hljs">git config --list</code></pre>
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=projects_in_julia ><a href="#projects_in_julia" class=header-anchor >Projects in Julia </a></h1> <p>Even for a personal project, we recommend to make a simple Julia package &#40;like in python you use virtual environment&#41;. This is a simple tutorial to help you coding an app in Julia.</p> <h2 id=prerequisite ><a href="#prerequisite" class=header-anchor >Prerequisite</a></h2> <p>You need to have <a href="https://julialang.org/downloads/">Julia</a> installed and a <a href="https://docs.github.com/en/get-started/signing-up-for-github/signing-up-for-a-new-github-account">GitHub</a> account.</p> <h2 id=creating_the_julia_package ><a href="#creating_the_julia_package" class=header-anchor >Creating the Julia Package</a></h2> <p>We&#39;ll be using <a href="https://github.com/tpapp/PkgSkeleton.jl">PkgSkeleton.jl</a> which allows to simplify the creation of packages. First check your git configuration &#40;as it will be used to create the package&#41; with:</p> <pre><code class="julia hljs">git config --list</code></pre>
 <p>You should see your <code>user.name</code>, your <code>user.email</code> and <code>github.user</code>, if not then use for example:</p>
 <pre><code class="julia hljs">git config --<span class=hljs-keyword >global</span> user.name <span class=hljs-string >&quot;firstname lastname&quot;</span>
 git config --<span class=hljs-keyword >global</span> user.email <span class=hljs-string >&quot;bla.bla@domain.ext&quot;</span>
@@ -35,7 +35,7 @@ <h2 id=start_documenting ><a href="#start_documenting" class=header-anchor >Star
 <div class=page-foot >
   <div class=copyright >
     <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a>
-    Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
+    Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
   </div>
 </div>
 </div>
diff --git a/modules/0-sotfware-installation/index.html b/modules/0-sotfware-installation/index.html
index 7d16b8d..d971a92 100644
--- a/modules/0-sotfware-installation/index.html
+++ b/modules/0-sotfware-installation/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item active" href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=setup ><a href="#setup" class=header-anchor >Setup</a></h1> <h2 id=running_the_notebooks_locally ><a href="#running_the_notebooks_locally" class=header-anchor >Running the notebooks locally</a></h2> <p>To run the <a href="https://github.com/dataflowr/notebooks">notebooks</a> locally, we recommend the following procedure:</p> <ul> <li><p>First clone the GitHub repository containing the notebooks. The following command will create a directory <code>notebooks</code> with all the files from the repository inside:</p> </ul> <pre><code class="julia hljs">$ git clone https://github.com/dataflowr/notebooks.git</code></pre>
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item active" href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=setup ><a href="#setup" class=header-anchor >Setup</a></h1> <h2 id=running_the_notebooks_locally ><a href="#running_the_notebooks_locally" class=header-anchor >Running the notebooks locally</a></h2> <p>To run the <a href="https://github.com/dataflowr/notebooks">notebooks</a> locally, we recommend the following procedure:</p> <ul> <li><p>First clone the GitHub repository containing the notebooks. The following command will create a directory <code>notebooks</code> with all the files from the repository inside:</p> </ul> <pre><code class="julia hljs">$ git clone https://github.com/dataflowr/notebooks.git</code></pre>
 <ul>
 <li><p>Then, create a <a href="https://docs.python.org/3/tutorial/venv.html">virtual environment</a>: the following command will create a directory <code>dldiy</code> and also create directories inside it &#40;so you might want to create this directory inside <code>/notebooks</code>&#41;</p>
 
@@ -37,7 +37,7 @@ <h3 id=tldr ><a href="#tldr" class=header-anchor >tl;dr</a></h3>
 <div class=page-foot >
   <div class=copyright >
     <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a>
-    Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
+    Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
   </div>
 </div>
 </div>
diff --git a/modules/1-intro-general-overview/index.html b/modules/1-intro-general-overview/index.html
index 8e86c5a..a1da440 100644
--- a/modules/1-intro-general-overview/index.html
+++ b/modules/1-intro-general-overview/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item active" href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_1_-_introduction_general_overview ><a href="#module_1_-_introduction_general_overview" class=header-anchor >Module 1 - Introduction &amp; General Overview</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#introduction_general_overview">Introduction &amp; General Overview</a><li><a href="#slides_and_notebook">Slides and Notebook</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=introduction_general_overview ><a href="#introduction_general_overview" class=header-anchor >Introduction &amp; General Overview</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'ZhC-DIrCe6A', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Intro <br> <a href='#player' onclick='changeYouTubeSource(31,0)'> 0:31</a> Goal of this lecture <br> <a href='#player' onclick='changeYouTubeSource(128,0)'> 2:08</a> What is deep learning? <br> <a href='#player' onclick='changeYouTubeSource(426,0)'> 7:06</a> Why deep learning now? <br> <a href='#player' onclick='changeYouTubeSource(573,0)'> 9:33</a> Deep learning pipeline <br> <a href='#player' onclick='changeYouTubeSource(737,0)'> 12:17</a> General overview <br> <a href='#player' onclick='changeYouTubeSource(962,0)'> 16:02</a> Organization of the course <br> <a href='#player' onclick='changeYouTubeSource(1104,0)'> 18:24</a> A first example in Colab (setting) <br> <a href='#player' onclick='changeYouTubeSource(1175,0)'> 19:35</a> Dogs vs cats (data wrangling) <br> <a href='#player' onclick='changeYouTubeSource(1550,0)'> 25:50</a> Data processing (dataset and dataloader) <br> <a href='#player' onclick='changeYouTubeSource(2451,0)'> 40:51</a> VGG model <br> <a href='#player' onclick='changeYouTubeSource(2755,0)'> 45:55</a> Modifying the last layer <br> <a href='#player' onclick='changeYouTubeSource(2990,0)'> 49:50</a> Choosing your loss and optimizer for training <br> <a href='#player' onclick='changeYouTubeSource(3460,0)'> 57:40</a> Precomputing features <br> <a href='#player' onclick='changeYouTubeSource(3819,0)'> 1:03:39</a> Qualitative analysis </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module1.html">slides</a></p> <li><p>⚠ Dogs and Cats with VGG: <a href="/website/notebooks_md/01_intro">static notebook</a>, <a href="https://github.com/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">code &#40;GitHub&#41;</a> or running in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">colab</a> GPU is required for this notebook ⚠</p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p>⚠ <a href="https://github.com/dataflowr/notebooks/blob/master/Module1/01_practical_empty.ipynb">More dogs and cats with VGG and resnet</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_practical_empty.ipynb">colab</a> GPU is required for this notebook ⚠</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item active" href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_1_-_introduction_general_overview ><a href="#module_1_-_introduction_general_overview" class=header-anchor >Module 1 - Introduction &amp; General Overview</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#introduction_general_overview">Introduction &amp; General Overview</a><li><a href="#slides_and_notebook">Slides and Notebook</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=introduction_general_overview ><a href="#introduction_general_overview" class=header-anchor >Introduction &amp; General Overview</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'ZhC-DIrCe6A', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Intro <br> <a href='#player' onclick='changeYouTubeSource(31,0)'> 0:31</a> Goal of this lecture <br> <a href='#player' onclick='changeYouTubeSource(128,0)'> 2:08</a> What is deep learning? <br> <a href='#player' onclick='changeYouTubeSource(426,0)'> 7:06</a> Why deep learning now? <br> <a href='#player' onclick='changeYouTubeSource(573,0)'> 9:33</a> Deep learning pipeline <br> <a href='#player' onclick='changeYouTubeSource(737,0)'> 12:17</a> General overview <br> <a href='#player' onclick='changeYouTubeSource(962,0)'> 16:02</a> Organization of the course <br> <a href='#player' onclick='changeYouTubeSource(1104,0)'> 18:24</a> A first example in Colab (setting) <br> <a href='#player' onclick='changeYouTubeSource(1175,0)'> 19:35</a> Dogs vs cats (data wrangling) <br> <a href='#player' onclick='changeYouTubeSource(1550,0)'> 25:50</a> Data processing (dataset and dataloader) <br> <a href='#player' onclick='changeYouTubeSource(2451,0)'> 40:51</a> VGG model <br> <a href='#player' onclick='changeYouTubeSource(2755,0)'> 45:55</a> Modifying the last layer <br> <a href='#player' onclick='changeYouTubeSource(2990,0)'> 49:50</a> Choosing your loss and optimizer for training <br> <a href='#player' onclick='changeYouTubeSource(3460,0)'> 57:40</a> Precomputing features <br> <a href='#player' onclick='changeYouTubeSource(3819,0)'> 1:03:39</a> Qualitative analysis </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module1.html">slides</a></p> <li><p>⚠ Dogs and Cats with VGG: <a href="/website/notebooks_md/01_intro">static notebook</a>, <a href="https://github.com/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">code &#40;GitHub&#41;</a> or running in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">colab</a> GPU is required for this notebook ⚠</p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p>⚠ <a href="https://github.com/dataflowr/notebooks/blob/master/Module1/01_practical_empty.ipynb">More dogs and cats with VGG and resnet</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_practical_empty.ipynb">colab</a> GPU is required for this notebook ⚠</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/10-generative-adversarial-networks/index.html b/modules/10-generative-adversarial-networks/index.html
index 6378705..da53f10 100644
--- a/modules/10-generative-adversarial-networks/index.html
+++ b/modules/10-generative-adversarial-networks/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item active" href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_10_-_generative_adversarial_networks ><a href="#module_10_-_generative_adversarial_networks" class=header-anchor >Module 10 - Generative Adversarial Networks</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#generative_adversarial_networks">Generative Adversarial Networks</a><li><a href="#slides">Slides</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=generative_adversarial_networks ><a href="#generative_adversarial_networks" class=header-anchor >Generative Adversarial Networks</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'tqhmilLuJRs', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(15,0)'> 0:15</a> Presentation of GANs <br> <a href='#player' onclick='changeYouTubeSource(109,0)'> 1:49</a> GAN learning <br> <a href='#player' onclick='changeYouTubeSource(253,0)'> 4:13</a> Learning the discriminator <br> <a href='#player' onclick='changeYouTubeSource(376,0)'> 6:16</a> Learning the generator <br> <a href='#player' onclick='changeYouTubeSource(445,0)'> 7:25</a> A trick for learning the generator <br> <a href='#player' onclick='changeYouTubeSource(600,0)'> 10:00</a> GAN for 2d-point clouds <br> <a href='#player' onclick='changeYouTubeSource(711,0)'> 11:51</a> Training loop in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(908,0)'> 15:08</a> Loss curves <br> <a href='#player' onclick='changeYouTubeSource(972,0)'> 16:12</a> Generation with GANs <br> <a href='#player' onclick='changeYouTubeSource(1035,0)'> 17:15</a> Mode collapse <br> <a href='#player' onclick='changeYouTubeSource(1200,0)'> 20:00</a> Conditional GAN <br> <a href='#player' onclick='changeYouTubeSource(1275,0)'> 21:15</a> InfoGAN <br> <a href='#player' onclick='changeYouTubeSource(1374,0)'> 22:54</a> Deep convolutional GAN <br> <a href='#player' onclick='changeYouTubeSource(1545,0)'> 25:45</a> Practicals <br> <a href='#player' onclick='changeYouTubeSource(1718,0)'> 28:38</a> Non convergence for GANs <br> <a href='#player' onclick='changeYouTubeSource(1980,0)'> 33:00</a> Coding a conditional GAN <br> <a href='#player' onclick='changeYouTubeSource(2353,0)'> 39:13</a> Coding an InfoGAN <br> <a href='#player' onclick='changeYouTubeSource(2615,0)'> 43:35</a> Examples of failures </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module10.html">slides</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb">Conditional GAN and InfoGAN</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb">colab</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item active" href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_10_-_generative_adversarial_networks ><a href="#module_10_-_generative_adversarial_networks" class=header-anchor >Module 10 - Generative Adversarial Networks</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#generative_adversarial_networks">Generative Adversarial Networks</a><li><a href="#slides">Slides</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=generative_adversarial_networks ><a href="#generative_adversarial_networks" class=header-anchor >Generative Adversarial Networks</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'tqhmilLuJRs', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(15,0)'> 0:15</a> Presentation of GANs <br> <a href='#player' onclick='changeYouTubeSource(109,0)'> 1:49</a> GAN learning <br> <a href='#player' onclick='changeYouTubeSource(253,0)'> 4:13</a> Learning the discriminator <br> <a href='#player' onclick='changeYouTubeSource(376,0)'> 6:16</a> Learning the generator <br> <a href='#player' onclick='changeYouTubeSource(445,0)'> 7:25</a> A trick for learning the generator <br> <a href='#player' onclick='changeYouTubeSource(600,0)'> 10:00</a> GAN for 2d-point clouds <br> <a href='#player' onclick='changeYouTubeSource(711,0)'> 11:51</a> Training loop in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(908,0)'> 15:08</a> Loss curves <br> <a href='#player' onclick='changeYouTubeSource(972,0)'> 16:12</a> Generation with GANs <br> <a href='#player' onclick='changeYouTubeSource(1035,0)'> 17:15</a> Mode collapse <br> <a href='#player' onclick='changeYouTubeSource(1200,0)'> 20:00</a> Conditional GAN <br> <a href='#player' onclick='changeYouTubeSource(1275,0)'> 21:15</a> InfoGAN <br> <a href='#player' onclick='changeYouTubeSource(1374,0)'> 22:54</a> Deep convolutional GAN <br> <a href='#player' onclick='changeYouTubeSource(1545,0)'> 25:45</a> Practicals <br> <a href='#player' onclick='changeYouTubeSource(1718,0)'> 28:38</a> Non convergence for GANs <br> <a href='#player' onclick='changeYouTubeSource(1980,0)'> 33:00</a> Coding a conditional GAN <br> <a href='#player' onclick='changeYouTubeSource(2353,0)'> 39:13</a> Coding an InfoGAN <br> <a href='#player' onclick='changeYouTubeSource(2615,0)'> 43:35</a> Examples of failures </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module10.html">slides</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb">Conditional GAN and InfoGAN</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module10/10_GAN_double_moon.ipynb">colab</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/11a-recurrent-neural-networks-theory/index.html b/modules/11a-recurrent-neural-networks-theory/index.html
index b751e6b..947167f 100644
--- a/modules/11a-recurrent-neural-networks-theory/index.html
+++ b/modules/11a-recurrent-neural-networks-theory/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item active" href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_11a_-_recurrent_neural_networks_theory ><a href="#module_11a_-_recurrent_neural_networks_theory" class=header-anchor >Module 11a - Recurrent Neural Networks theory</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#theory_of_rnns">Theory of RNNs</a><li><a href="#slides">Slides</a><li><a href="#references">References</a></ol></div> <h2 id=theory_of_rnns ><a href="#theory_of_rnns" class=header-anchor >Theory of RNNs</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: '4G681MnP_OA', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(52,0)'> 0:52</a> Introduction to RNNs <br> <a href='#player' onclick='changeYouTubeSource(77,0)'> 1:17</a> 1D convolutional networks for sequences <br> <a href='#player' onclick='changeYouTubeSource(136,0)'> 2:16</a> Various tasks for RNNs <br> <a href='#player' onclick='changeYouTubeSource(315,0)'> 5:15</a> Theory of RNN <br> <a href='#player' onclick='changeYouTubeSource(479,0)'> 7:59</a> Backprop for RNN <br> <a href='#player' onclick='changeYouTubeSource(630,0)'> 10:30</a> A binary classification problem for sequences <br> <a href='#player' onclick='changeYouTubeSource(1037,0)'> 17:17</a> Elman network <br> <a href='#player' onclick='changeYouTubeSource(1262,0)'> 21:02</a> Training RNN <br> <a href='#player' onclick='changeYouTubeSource(1371,0)'> 22:51</a> Results for Elman network <br> <a href='#player' onclick='changeYouTubeSource(1462,0)'> 24:22</a> Gating for RNN <br> <a href='#player' onclick='changeYouTubeSource(1690,0)'> 28:10</a> Gated RNN in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(1767,0)'> 29:27</a> Results for gated RNN <br> <a href='#player' onclick='changeYouTubeSource(1812,0)'> 30:12</a> LSTM and GRU <br> <a href='#player' onclick='changeYouTubeSource(2051,0)'> 34:11</a> Equations for GRU <br> <a href='#player' onclick='changeYouTubeSource(2243,0)'> 37:23</a> Equations for LSTM <br> <a href='#player' onclick='changeYouTubeSource(2431,0)'> 40:31</a> LSTM in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(2564,0)'> 42:44</a> Results for LSTM <br> <a href='#player' onclick='changeYouTubeSource(2623,0)'> 43:43</a> Empirical results for LSTM and GRU </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module11.html">slides</a></p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <ul> <li><p><a href="http://colah.github.io/posts/2015-08-Understanding-LSTMs/">Understanding LSTM Networks</a> by Christopher Olah</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item active" href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_11a_-_recurrent_neural_networks_theory ><a href="#module_11a_-_recurrent_neural_networks_theory" class=header-anchor >Module 11a - Recurrent Neural Networks theory</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#theory_of_rnns">Theory of RNNs</a><li><a href="#slides">Slides</a><li><a href="#references">References</a></ol></div> <h2 id=theory_of_rnns ><a href="#theory_of_rnns" class=header-anchor >Theory of RNNs</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: '4G681MnP_OA', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(52,0)'> 0:52</a> Introduction to RNNs <br> <a href='#player' onclick='changeYouTubeSource(77,0)'> 1:17</a> 1D convolutional networks for sequences <br> <a href='#player' onclick='changeYouTubeSource(136,0)'> 2:16</a> Various tasks for RNNs <br> <a href='#player' onclick='changeYouTubeSource(315,0)'> 5:15</a> Theory of RNN <br> <a href='#player' onclick='changeYouTubeSource(479,0)'> 7:59</a> Backprop for RNN <br> <a href='#player' onclick='changeYouTubeSource(630,0)'> 10:30</a> A binary classification problem for sequences <br> <a href='#player' onclick='changeYouTubeSource(1037,0)'> 17:17</a> Elman network <br> <a href='#player' onclick='changeYouTubeSource(1262,0)'> 21:02</a> Training RNN <br> <a href='#player' onclick='changeYouTubeSource(1371,0)'> 22:51</a> Results for Elman network <br> <a href='#player' onclick='changeYouTubeSource(1462,0)'> 24:22</a> Gating for RNN <br> <a href='#player' onclick='changeYouTubeSource(1690,0)'> 28:10</a> Gated RNN in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(1767,0)'> 29:27</a> Results for gated RNN <br> <a href='#player' onclick='changeYouTubeSource(1812,0)'> 30:12</a> LSTM and GRU <br> <a href='#player' onclick='changeYouTubeSource(2051,0)'> 34:11</a> Equations for GRU <br> <a href='#player' onclick='changeYouTubeSource(2243,0)'> 37:23</a> Equations for LSTM <br> <a href='#player' onclick='changeYouTubeSource(2431,0)'> 40:31</a> LSTM in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(2564,0)'> 42:44</a> Results for LSTM <br> <a href='#player' onclick='changeYouTubeSource(2623,0)'> 43:43</a> Empirical results for LSTM and GRU </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module11.html">slides</a></p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <ul> <li><p><a href="http://colah.github.io/posts/2015-08-Understanding-LSTMs/">Understanding LSTM Networks</a> by Christopher Olah</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/11b-recurrent-neural-networks-practice/index.html b/modules/11b-recurrent-neural-networks-practice/index.html
index bdf0da8..aa07478 100644
--- a/modules/11b-recurrent-neural-networks-practice/index.html
+++ b/modules/11b-recurrent-neural-networks-practice/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item active" href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_11b_-_recurrent_neural_networks_practice ><a href="#module_11b_-_recurrent_neural_networks_practice" class=header-anchor >Module 11b - Recurrent Neural Networks practice</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#theory_of_rnns">Theory of RNNs</a><li><a href="#notebook">Notebook</a><li><a href="#practicals">Practicals</a><li><a href="#references">References</a></ol></div> <h2 id=theory_of_rnns ><a href="#theory_of_rnns" class=header-anchor >Theory of RNNs</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'EsS95AxCnh0', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Generating the dataset for binary classification of parentheses <br> <a href='#player' onclick='changeYouTubeSource(296,0)'> 4:56</a> Elman network <br> <a href='#player' onclick='changeYouTubeSource(685,0)'> 11:25</a> RNN with gating <br> <a href='#player' onclick='changeYouTubeSource(846,0)'> 14:06</a> LSTM <br> <a href='#player' onclick='changeYouTubeSource(1113,0)'> 18:33</a> Be careful with errors given on the training set! </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_RNN.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module11/11_RNN.ipynb">colab</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_predicitions_RNN_empty.ipynb">notebook</a> &#40;or opened in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module11/11_predicitions_RNN_empty.ipynb">colab</a>&#41; for predicting engine failure with RNN.</p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <blockquote> <p>RNNs can generate bounded hierarchical languages with optimal memory &#40;2020&#41; John Hewitt, Michael Hahn, Surya Ganguli, Percy Liang, Christopher D. Manning <a href="https://arxiv.org/abs/2010.07515">arXiv:2010.07515</a></p> </blockquote> <blockquote> <p>Self-Attention Networks Can Process Bounded Hierarchical Languages &#40;2021&#41; Shunyu Yao, Binghui Peng, Christos Papadimitriou, Karthik Narasimhan <a href="https://arxiv.org/abs/2105.11115">arXiv:2105.11115</a></p> </blockquote> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item active" href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_11b_-_recurrent_neural_networks_practice ><a href="#module_11b_-_recurrent_neural_networks_practice" class=header-anchor >Module 11b - Recurrent Neural Networks practice</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#theory_of_rnns">Theory of RNNs</a><li><a href="#notebook">Notebook</a><li><a href="#practicals">Practicals</a><li><a href="#references">References</a></ol></div> <h2 id=theory_of_rnns ><a href="#theory_of_rnns" class=header-anchor >Theory of RNNs</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'EsS95AxCnh0', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Generating the dataset for binary classification of parentheses <br> <a href='#player' onclick='changeYouTubeSource(296,0)'> 4:56</a> Elman network <br> <a href='#player' onclick='changeYouTubeSource(685,0)'> 11:25</a> RNN with gating <br> <a href='#player' onclick='changeYouTubeSource(846,0)'> 14:06</a> LSTM <br> <a href='#player' onclick='changeYouTubeSource(1113,0)'> 18:33</a> Be careful with errors given on the training set! </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_RNN.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module11/11_RNN.ipynb">colab</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_predicitions_RNN_empty.ipynb">notebook</a> &#40;or opened in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module11/11_predicitions_RNN_empty.ipynb">colab</a>&#41; for predicting engine failure with RNN.</p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <blockquote> <p>RNNs can generate bounded hierarchical languages with optimal memory &#40;2020&#41; John Hewitt, Michael Hahn, Surya Ganguli, Percy Liang, Christopher D. Manning <a href="https://arxiv.org/abs/2010.07515">arXiv:2010.07515</a></p> </blockquote> <blockquote> <p>Self-Attention Networks Can Process Bounded Hierarchical Languages &#40;2021&#41; Shunyu Yao, Binghui Peng, Christos Papadimitriou, Karthik Narasimhan <a href="https://arxiv.org/abs/2105.11115">arXiv:2105.11115</a></p> </blockquote> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/11c-batches-with-sequences/index.html b/modules/11c-batches-with-sequences/index.html
index dc8aa5f..6514f48 100644
--- a/modules/11c-batches-with-sequences/index.html
+++ b/modules/11c-batches-with-sequences/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item active" href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_11c_-_batches_with_sequences_in_pytorch ><a href="#module_11c_-_batches_with_sequences_in_pytorch" class=header-anchor >Module 11c - Batches with sequences in Pytorch</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#pytorch_tutorial_on_batch_for_sequences">Pytorch tutorial on batch for sequences</a><li><a href="#notebook">Notebook</a></ol></div> <h2 id=pytorch_tutorial_on_batch_for_sequences ><a href="#pytorch_tutorial_on_batch_for_sequences" class=header-anchor >Pytorch tutorial on batch for sequences</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Fqx_RCwenfg', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Presentation <br> <a href='#player' onclick='changeYouTubeSource(135,0)'> 2:15</a> Step 1: Construct Vocabulary <br> <a href='#player' onclick='changeYouTubeSource(170,0)'> 2:50</a> Step 2: Load indexed data (list of instances, where each instance is list of character indices) <br> <a href='#player' onclick='changeYouTubeSource(225,0)'> 3:45</a> Step 3: Make Model <br> <a href='#player' onclick='changeYouTubeSource(290,0)'> 4:50</a> Step 4: Pad instances with 0s till max length sequence <br> <a href='#player' onclick='changeYouTubeSource(347,0)'> 5:47</a> Step 5: Sort instances by sequence length in descending order <br> <a href='#player' onclick='changeYouTubeSource(415,0)'> 6:55</a> Step 6: Embed the instances <br> <a href='#player' onclick='changeYouTubeSource(550,0)'> 9:10</a> Step 7: Call pack_padded_sequence with embeded instances and sequence lengths <br> <a href='#player' onclick='changeYouTubeSource(761,0)'> 12:41</a> Step 8: Forward with LSTM <br> <a href='#player' onclick='changeYouTubeSource(878,0)'> 14:38</a> Step 9: Call unpack_padded_sequences if required / or just pick last hidden vector </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_Tutorial_packing_sequences.ipynb">notebook</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item active" href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_11c_-_batches_with_sequences_in_pytorch ><a href="#module_11c_-_batches_with_sequences_in_pytorch" class=header-anchor >Module 11c - Batches with sequences in Pytorch</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#pytorch_tutorial_on_batch_for_sequences">Pytorch tutorial on batch for sequences</a><li><a href="#notebook">Notebook</a></ol></div> <h2 id=pytorch_tutorial_on_batch_for_sequences ><a href="#pytorch_tutorial_on_batch_for_sequences" class=header-anchor >Pytorch tutorial on batch for sequences</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Fqx_RCwenfg', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Presentation <br> <a href='#player' onclick='changeYouTubeSource(135,0)'> 2:15</a> Step 1: Construct Vocabulary <br> <a href='#player' onclick='changeYouTubeSource(170,0)'> 2:50</a> Step 2: Load indexed data (list of instances, where each instance is list of character indices) <br> <a href='#player' onclick='changeYouTubeSource(225,0)'> 3:45</a> Step 3: Make Model <br> <a href='#player' onclick='changeYouTubeSource(290,0)'> 4:50</a> Step 4: Pad instances with 0s till max length sequence <br> <a href='#player' onclick='changeYouTubeSource(347,0)'> 5:47</a> Step 5: Sort instances by sequence length in descending order <br> <a href='#player' onclick='changeYouTubeSource(415,0)'> 6:55</a> Step 6: Embed the instances <br> <a href='#player' onclick='changeYouTubeSource(550,0)'> 9:10</a> Step 7: Call pack_padded_sequence with embeded instances and sequence lengths <br> <a href='#player' onclick='changeYouTubeSource(761,0)'> 12:41</a> Step 8: Forward with LSTM <br> <a href='#player' onclick='changeYouTubeSource(878,0)'> 14:38</a> Step 9: Call unpack_padded_sequences if required / or just pick last hidden vector </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module11/11_Tutorial_packing_sequences.ipynb">notebook</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/12-attention/index.html b/modules/12-attention/index.html
index fc845e6..d5f16d7 100644
--- a/modules/12-attention/index.html
+++ b/modules/12-attention/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item active" href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_12_-_attention_and_transformers ><a href="#module_12_-_attention_and_transformers" class=header-anchor >Module 12 - Attention and Transformers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#attention_with_rnns">Attention with RNNs</a><li><a href="#pytorch_implementation">PyTorch implementation</a><li><a href="#self-attention_in_transformers">&#40;Self-&#41;Attention in Transformers</a><li><a href="#transformer_block">Transformer block</a><li><a href="#transformers_using_named_tensor_notation">Transformers using Named Tensor Notation</a><li><a href="#hacking_a_simple_transformer_block">Hacking a simple Transformer block</a></ol></div> <h2 id=attention_with_rnns ><a href="#attention_with_rnns" class=header-anchor >Attention with RNNs</a></h2> <p>The first attention mechanism was proposed in <a href="https://arxiv.org/abs/1409.0473">Neural Machine Translation by Jointly Learning to Align and Translate</a> by Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio &#40;presented at ICLR 2015&#41;.</p> <p>The task considered is English-to-French translation and the attention mechanism is proposed to extend a seq2seq architecture by adding a context vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">c_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> in the RNN decoder so that, the hidden states for the decoder are computed recursively as <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>s</mi><mi>i</mi></msub><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><msub><mi>s</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>y</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>c</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">s_i = f(s_{i-1}, y_{i-1}, c_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">y_{i-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> is the previously predicted token and predictions are made in a probabilist manner as <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>i</mi></msub><mo>∼</mo><mi>g</mi><mo stretchy=false >(</mo><msub><mi>y</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>s</mi><mi>i</mi></msub><mo separator=true >,</mo><msub><mi>c</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">y_i \sim g(y_{i-1},s_i,c_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>s</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">s_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">c_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> are the current hidden state and context of the decoder.</p> <p>Now the main novelty is the introduction of the context <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">c_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> which is a weighted average of all the hidden states of the encoder: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>j</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><msub><mi>h</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">c_i = \sum_{j=1}^T \alpha_{i,j} h_j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.417049em;vertical-align:-0.43581800000000004em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.43581800000000004em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">T</span></span></span></span> is the length of the input sequence, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>h</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>h</mi><mi>T</mi></msub></mrow><annotation encoding="application/x-tex">h_1,\dots, h_T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> are the corresponding hidden states of the decoder and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mo>∑</mo><mi>j</mi></msub><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sum_j \alpha_{i,j}=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.185818em;vertical-align:-0.43581800000000004em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.43581800000000004em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span>. Hence the context allows passing direct information from the &#39;relevant&#39; part of the input to the decoder. The coefficients <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><msubsup><mo stretchy=false >)</mo><mrow><mi>j</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">(\alpha_{i,j})_{j=1}^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.236103em;vertical-align:-0.394772em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.441336em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.394772em;"><span></span></span></span></span></span></span></span></span></span> are computed from the current hidden state of the decoder <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>s</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">s_{i-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> and all the hidden states from the encoder <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>h</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>h</mi><mi>T</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(h_1, \dots, h_T)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> as explained below &#40;taken from the original paper&#41;:</p> <img src="/website/modules/extras/attention/attention_bahdanau.png" style="width: 620px; height: auto; display: inline"> <h2 id=pytorch_implementation ><a href="#pytorch_implementation" class=header-anchor >PyTorch implementation</a></h2> <p>In <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention.ipynb">Attention for seq2seq</a>, you can play with a simple model and code the attention mechanism proposed in the paper. For the alignment network <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>a</mi></mrow><annotation encoding="application/x-tex">a</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">a</span></span></span></span> &#40;used to define the coefficient <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><mo>=</mo><mi>s</mi><mi>o</mi><mi>f</mi><mi>t</mi><mi>m</mi><mi>a</mi><msub><mi>x</mi><mi>j</mi></msub><mo stretchy=false >(</mo><mi>a</mi><mo stretchy=false >(</mo><msub><mi>s</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>h</mi><mi>j</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\alpha_{i,j} = softmax_{j}(a(s_{i-1},h_j))</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.716668em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.036108em;vertical-align:-0.286108em;"></span><span class="mord mathdefault">s</span><span class="mord mathdefault">o</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mord mathdefault">t</span><span class="mord mathdefault">m</span><span class="mord mathdefault">a</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">a</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span></span>&#41;, we take a MLP with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>tanh</mi><mo>⁡</mo></mrow><annotation encoding="application/x-tex">\tanh</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mop >tanh</span></span></span></span> activations. </p> <p>You will learn about seq2seq, teacher-forcing for RNNs and build the attention mechanism. To simplify things, we do not deal with batches &#40;see <a href="/website/modules/11c-batches-with-sequences">Batches with sequences in Pytorch</a> for more on that&#41;. The solution for this practical is provided in <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention_solution.ipynb">Attention for seq2seq- solution</a></p> <p>Note that each <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\alpha_{i,j}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.716668em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> is a real number so that we can display the matrix of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\alpha_{i,j}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.716668em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span>&#39;s where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> ranges over the input tokens and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> over the output tokens, see below &#40;taken from the paper&#41;:</p> <img src="/website/modules/extras/attention/attention_translate.jpeg" style="width: 620px; height: auto; display: inline"> <h2 id=self-attention_in_transformers ><a href="#self-attention_in_transformers" class=header-anchor >&#40;Self-&#41;Attention in Transformers</a></h2> <p>We now describe the attention mechanism proposed in <a href="https://arxiv.org/abs/1706.03762">Attention Is All You Need</a> by Vaswani et al. First, we recall basic notions from retrieval systems: query/key/value illustrated by an example: search for videos on Youtube. In this example, the query is the text in the search bar, the key is the metadata associated with the videos which are the values. Hence a score can be computed from the query and all the keys. Finally, the matched video with the highest score is returned.</p> <p>We see that we can formalize this process as follows: if <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Q</mi><mi>s</mi></msub></mrow><annotation encoding="application/x-tex">Q_s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> is the current query and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>K</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">K_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>V</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">V_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> are all the keys and values in the database, we return </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Y</mi><mi>s</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><msub><mtext>softmax</mtext><mi>t</mi></msub><mo stretchy=false >(</mo><mtext>score</mtext><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>s</mi></msub><mo separator=true >,</mo><msub><mi>K</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo><msub><mi>V</mi><mi>t</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Y_s = \sum_{t=1}^T\text{softmax}_{t}(\text{score}(Q_s, K_t))V_t, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.0954490000000003em;vertical-align:-1.267113em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord text"><span class=mord >score</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><msub><mtext>softmax</mtext><mi>t</mi></msub><mo stretchy=false >(</mo><mtext>score</mtext><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>s</mi></msub><mo separator=true >,</mo><msub><mi>K</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sum_{t=1}^T\text{softmax}_{t}(\text{score}(Q_s, K_t))=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.2809409999999999em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord text"><span class=mord >score</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span>.</p> <p>Note that this formalism allows us to recover the way contexts were computed above &#40;where the score function was called the alignment network&#41;. Now, we will change the score function and consider dot-product attention: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>score</mtext><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>s</mi></msub><mo separator=true >,</mo><msub><mi>K</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mfrac><mrow><msubsup><mi>Q</mi><mi>s</mi><mi>T</mi></msubsup><msub><mi>K</mi><mi>t</mi></msub></mrow><msqrt><mi>d</mi></msqrt></mfrac></mrow><annotation encoding="application/x-tex"> \text{score}(Q_s, K_t) = \frac{Q_s^TK_t}{\sqrt{d}}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >score</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.691565em;vertical-align:-0.538em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.153565em;"><span style="top:-2.5335085em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord sqrt mtight"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.937845em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord mtight" style="padding-left:0.833em;"><span class="mord mathdefault mtight">d</span></span></span><span style="top:-2.8978450000000002em;"><span class=pstrut  style="height:3em;"></span><span class="hide-tail mtight" style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.102155em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.5102em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9190928571428572em;"><span style="top:-2.214em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">s</span></span></span><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286em;"><span></span></span></span></span></span></span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.29634285714285713em;"><span style="top:-2.357em;margin-left:-0.07153em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.538em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span>. Note that for this definition to make sense, both the query <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Q</mi><mi>s</mi></msub></mrow><annotation encoding="application/x-tex">Q_s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and the key <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>K</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">K_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> need to live in the same space and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> is the dimension of this space.</p> <p>Given <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi></mrow><annotation encoding="application/x-tex">s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">s</span></span></span></span> inputs in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><msub><mi>d</mi><mtext>in</mtext></msub></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^{d_{\text{in}}}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3340428571428572em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> denoted by a matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><msub><mi>d</mi><mtext>in</mtext></msub><mo>×</mo><mi>s</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X\in \mathbb{R}^{d_{\text{in}}\times s}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3340428571428572em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">s</span></span></span></span></span></span></span></span></span></span></span></span> and a database containing <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span> samples in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^{d&#x27;}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.94248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.94248em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> denoted by a matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>×</mo><mi>t</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X&#x27;\in \mathbb{R}^{d&#x27;\times t}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.790992em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.94248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.94248em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">t</span></span></span></span></span></span></span></span></span></span></span></span>, we define:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>the queries: </mtext><mi>Q</mi><mo>=</mo><msub><mi>W</mi><mi>Q</mi></msub><mi>X</mi><mo separator=true >,</mo><mtext> with, </mtext><msub><mi>W</mi><mi>Q</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><msub><mi>d</mi><mtext>in</mtext></msub></mrow></msup><mspace linebreak=newline ></mspace><mtext>the keys: </mtext><mi>K</mi><mo>=</mo><msub><mi>W</mi><mi>K</mi></msub><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo separator=true >,</mo><mtext> with, </mtext><msub><mi>W</mi><mi>K</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow></msup><mspace linebreak=newline ></mspace><mtext>the values: </mtext><mi>V</mi><mo>=</mo><msub><mi>W</mi><mi>V</mi></msub><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo separator=true >,</mo><mtext> with, </mtext><msub><mi>W</mi><mi>V</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><msub><mi>d</mi><mtext>out</mtext></msub><mo>×</mo><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow></msup></mrow><annotation encoding="application/x-tex"> \text{the queries: } Q = W_Q X, \text{ with, } W_Q\in \mathbb{R}^{k\times d_{\text{in}}}\\ \text{the keys: } K = W_K X&#x27;, \text{ with, } W_K\in \mathbb{R}^{k\times d&#x27;}\\ \text{the values: } V = W_V X&#x27;, \text{ with, } W_V\in \mathbb{R}^{d_{\text{out}}\times d&#x27;} </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord text"><span class=mord >the queries: </span></span><span class="mord mathdefault">Q</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.980548em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.328331em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.328331em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.899108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.899108em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3340428571428572em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord text"><span class=mord >the keys: </span></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.996332em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.801892em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.99248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.99248em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord text"><span class=mord >the values: </span></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.996332em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.801892em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.99248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.99248em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.29634285714285713em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">out</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> <p>Now self-attention is simply obtained with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>=</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow><annotation encoding="application/x-tex">X=X&#x27;</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span> &#40;so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>=</mo><msub><mi>d</mi><mtext>in</mtext></msub></mrow><annotation encoding="application/x-tex">d&#x27;=d_{\text{in}}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31750199999999995em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>&#41; and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>d</mi><mtext>in</mtext></msub><mo>=</mo><msub><mi>d</mi><mtext>out</mtext></msub><mo>=</mo><mi>d</mi></mrow><annotation encoding="application/x-tex">d_{\text{in}} = d_{\text{out}} = d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31750199999999995em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">out</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span>. In summary, self-attention layer can take as input any tensor of the form <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>T</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X \in \mathbb{R}^{d\times T}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span></span> &#40;for any <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">T</span></span></span></span>&#41; has parameters: </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>W</mi><mi>Q</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo><msub><mi>W</mi><mi>K</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo><msub><mi>W</mi><mi>V</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> W_Q\in \mathbb{R}^{k\times d}, W_K\in \mathbb{R}^{k\times d}, W_V\in \mathbb{R}^{d\times d}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.969438em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.328331em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.093548em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.093548em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.093548em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>and produce <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Y</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>T</mi></mrow></msup></mrow><annotation encoding="application/x-tex">Y \in \mathbb{R}^{d\times T}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span></span> &#40;with same <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span> as for the input&#41;. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> is the dimension of the input and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span></span></span></span> is a hyper-parameter of the self-attention layer:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Y</mi><mi>s</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><msub><mtext>softmax</mtext><mi>t</mi></msub><mrow><mo fence=true >(</mo><mfrac><mrow><msubsup><mi>X</mi><mi>s</mi><mi>T</mi></msubsup><msubsup><mi>W</mi><mi>Q</mi><mi>T</mi></msubsup><msub><mi>W</mi><mi>K</mi></msub><msub><mi>X</mi><mi>t</mi></msub></mrow><msqrt><mi>k</mi></msqrt></mfrac><mo fence=true >)</mo></mrow><msub><mi>W</mi><mi>V</mi></msub><msub><mi>X</mi><mi>t</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Y_s = \sum_{t=1}^T\text{softmax}_{t}\left(\frac{X_s^TW_Q^TW_KX_t}{\sqrt{k}}\right)W_VX_t, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.0954490000000003em;vertical-align:-1.267113em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.64277em;"><span style="top:-2.17778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.93222em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class="mord mathdefault" style="margin-right:0.03148em;">k</span></span></span><span style="top:-2.89222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.10777999999999999em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.8014390000000002em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.4530000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.411439em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>with the convention that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>X</mi><mi>t</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">X_t\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span> &#40;resp. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Y</mi><mi>s</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">Y_s\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>&#41; is the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span>-th column of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi></mrow><annotation encoding="application/x-tex">X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span> &#40;resp. the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi></mrow><annotation encoding="application/x-tex">s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">s</span></span></span></span>-th column of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Y</mi></mrow><annotation encoding="application/x-tex">Y</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span></span></span></span>&#41;. Note that the notation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mtext>softmax</mtext><mi>t</mi></msub><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{softmax}_{t}(.)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span></span></span></span> might be a bit confusing. Recall that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>softmax</mtext></mrow><annotation encoding="application/x-tex">\text{softmax}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord text"><span class=mord >softmax</span></span></span></span></span> is always taking as input a vector and returning a &#40;normalized&#41; vector. In practice, most of the time, we are dealing with batches so that the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>softmax</mtext></mrow><annotation encoding="application/x-tex">\text{softmax}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord text"><span class=mord >softmax</span></span></span></span></span> function is taking as input a matrix &#40;or tensor&#41; and we need to normalize according to the right axis&#33; Named tensor notation see <a href="#transformers_using_named_tensor_notation">below</a> deals with this notational issue. I also find the interpretation given below helpful:</p> <p><strong>Mental model for self-attention:</strong> self-attention interpreted as taking expectation</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>s</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><mi>p</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mi>v</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=double-struck >E</mi><mo stretchy=false >[</mo><mi>v</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >]</mo><mo separator=true >,</mo><mspace linebreak=newline ></mspace><mtext>with, </mtext><mi>p</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mfrac><mrow><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mi>k</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><mrow><munder><mo>∑</mo><mi>r</mi></munder><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mi>k</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>r</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow></mfrac><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> y_s = \sum_{t=1}^T p(x_t | x_s) v(x_t) = \mathbb{E}[v(x) | x_s],\\ \text{with, } p(x_t|x_s) = \frac{\exp(q(x_s)k(x_t))}{\sum_{r}\exp(q(x_s)k(x_r))}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.0954490000000003em;vertical-align:-1.267113em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">p</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mord mathdefault" style="margin-right:0.03588em;">v</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbb">E</span></span><span class=mopen >[</span><span class="mord mathdefault" style="margin-right:0.03588em;">v</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >]</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >with, </span></span><span class="mord mathdefault">p</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.41271em;vertical-align:-0.9857100000000001em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.427em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.0016819999999999613em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">r</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >exp</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">r</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mop >exp</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.9857100000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span></span></span></span></span> <p>where the mappings <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo><mo separator=true >,</mo><mi>k</mi><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(.), k(.)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>v</mi><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">v(.)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">v</span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span></span></span></span> represent query, key and value.</p> <p>Multi-head attention combines several such operations in parallel, and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Y</mi></mrow><annotation encoding="application/x-tex">Y</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span></span></span></span> is the concatenation of the results along the feature dimension to which is applied one more linear transformation.</p> <h2 id=transformer_block ><a href="#transformer_block" class=header-anchor >Transformer block</a></h2> <img src="/website/modules/extras/attention/block_transformer.png" style="width: 320px; height: auto; display: inline"> <p>To finish the description of a transformer block, we need to define two last layers: Layer Norm and Feed Forward Network.</p> <p>The Layer Norm used in the transformer block is particularly simple as it acts on vectors and standardizes it as follows: for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">x\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>, we define</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>mean</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mfrac><mn>1</mn><mi>d</mi></mfrac><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>d</mi></munderover><msub><mi>x</mi><mi>i</mi></msub><mo>∈</mo><mi mathvariant=double-struck >R</mi><mspace linebreak=newline ></mspace><mtext>std</mtext><mo stretchy=false >(</mo><mi>x</mi><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>=</mo><mfrac><mn>1</mn><mi>d</mi></mfrac><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>d</mi></munderover><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo>−</mo><mtext>mean</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>∈</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex"> \text{mean}(x) =\frac{1}{d}\sum_{i=1}^d x_i\in \mathbb{R}\\ \text{std}(x)^2 = \frac{1}{d}\sum_{i=1}^d(x_i-\text{mean}(x))^2\in \mathbb{R} </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >mean</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.1137820000000005em;vertical-align:-1.277669em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">d</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8361130000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >std</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.1137820000000005em;vertical-align:-1.277669em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">d</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8361130000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >mean</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span></span> <p>and then the Layer Norm has two parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>γ</mi><mo separator=true >,</mo><mi>β</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">\gamma, \beta\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span> and </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>L</mi><mi>N</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>γ</mi><mo>⋅</mo><mfrac><mrow><mi>x</mi><mo>−</mo><mtext>mean</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo></mrow><mrow><mtext>std</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo></mrow></mfrac><mo>+</mo><mi>β</mi><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> LN(x) = \gamma \cdot \frac{x-\text{mean}(x)}{\text{std}(x)}+\beta, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">L</span><span class="mord mathdefault" style="margin-right:0.10903em;">N</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.63889em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋅</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:2.363em;vertical-align:-0.936em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.427em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord text"><span class=mord >std</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord text"><span class=mord >mean</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.936em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=mpunct >,</span></span></span></span></span> <p>where we used the natural broadcasting rule for subtracting the mean and dividing by std and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo>⋅</mo></mrow><annotation encoding="application/x-tex">\cdot</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44445em;vertical-align:0em;"></span><span class=mord >⋅</span></span></span></span> is component-wise multiplication.</p> <p>A Feed Forward Network is an MLP acting on vectors: for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">x\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>, we define </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi><mi>F</mi><mi>N</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>max</mi><mo>⁡</mo><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>x</mi><msub><mi>W</mi><mn>1</mn></msub><mo>+</mo><msub><mi>b</mi><mn>1</mn></msub><mo stretchy=false >)</mo><msub><mi>W</mi><mn>2</mn></msub><mo>+</mo><msub><mi>b</mi><mn>2</mn></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> FFN(x) = \max(0,xW_1+b_1)W_2+b_2, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class="mord mathdefault" style="margin-right:0.10903em;">N</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mop >max</span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">x</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>W</mi><mn>1</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>h</mi></mrow></msup></mrow><annotation encoding="application/x-tex">W_1\in \mathbb{R}^{d\times h}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">h</span></span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>b</mi><mn>1</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>h</mi></msup></mrow><annotation encoding="application/x-tex">b_1\in \mathbb{R}^h</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">h</span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>W</mi><mn>2</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>h</mi><mo>×</mo><mi>d</mi></mrow></msup></mrow><annotation encoding="application/x-tex">W_2\in \mathbb{R}^{h\times d}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">h</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>b</mi><mn>2</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">b_2\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>.</p> <p>Each of these layers is applied on each of the inputs given to the transformer block as depicted below:</p> <p><img src="/website/modules/extras/attention/transformer_block_nocode.png" alt="" /></p> <p>Note that this block is equivariant: if we permute the inputs, then the outputs will be permuted with the same permutation. As a result, the order of the input is irrelevant to the transformer block. In particular, this order cannot be used. The important notion of positional encoding allows us to take order into account. It is a deterministic unique encoding for each time step that is added to the input tokens.</p> <h2 id=transformers_using_named_tensor_notation ><a href="#transformers_using_named_tensor_notation" class=header-anchor >Transformers using Named Tensor Notation</a></h2> <p>In <a href="https://hackmd.io/@mlelarge/HkVlvrc8j">Transformers using Named Tensor Notation</a>, we derive the formal equations for the Transformer block using named tensor notation.</p> <h2 id=hacking_a_simple_transformer_block ><a href="#hacking_a_simple_transformer_block" class=header-anchor >Hacking a simple Transformer block</a></h2> <p>Now is the time to have fun building a simple transformer block and to <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb">think like transformers</a> &#40;open in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb">colab</a>&#41;.</p> <img src="/website/modules/extras/attention/attention_matrix.png" style="width: 320px; height: auto; display: inline"> <img src="/website/modules/extras/attention/attention_matrix2.png" style="width: 320px; height: auto; display: inline"> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item active" href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_12_-_attention_and_transformers ><a href="#module_12_-_attention_and_transformers" class=header-anchor >Module 12 - Attention and Transformers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#attention_with_rnns">Attention with RNNs</a><li><a href="#pytorch_implementation">PyTorch implementation</a><li><a href="#self-attention_in_transformers">&#40;Self-&#41;Attention in Transformers</a><li><a href="#transformer_block">Transformer block</a><li><a href="#transformers_using_named_tensor_notation">Transformers using Named Tensor Notation</a><li><a href="#hacking_a_simple_transformer_block">Hacking a simple Transformer block</a></ol></div> <h2 id=attention_with_rnns ><a href="#attention_with_rnns" class=header-anchor >Attention with RNNs</a></h2> <p>The first attention mechanism was proposed in <a href="https://arxiv.org/abs/1409.0473">Neural Machine Translation by Jointly Learning to Align and Translate</a> by Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio &#40;presented at ICLR 2015&#41;.</p> <p>The task considered is English-to-French translation and the attention mechanism is proposed to extend a seq2seq architecture by adding a context vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">c_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> in the RNN decoder so that, the hidden states for the decoder are computed recursively as <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>s</mi><mi>i</mi></msub><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><msub><mi>s</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>y</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>c</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">s_i = f(s_{i-1}, y_{i-1}, c_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">y_{i-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> is the previously predicted token and predictions are made in a probabilist manner as <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>i</mi></msub><mo>∼</mo><mi>g</mi><mo stretchy=false >(</mo><msub><mi>y</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>s</mi><mi>i</mi></msub><mo separator=true >,</mo><msub><mi>c</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">y_i \sim g(y_{i-1},s_i,c_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>s</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">s_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">c_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> are the current hidden state and context of the decoder.</p> <p>Now the main novelty is the introduction of the context <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">c_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> which is a weighted average of all the hidden states of the encoder: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>c</mi><mi>i</mi></msub><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>j</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><msub><mi>h</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">c_i = \sum_{j=1}^T \alpha_{i,j} h_j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">c</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.417049em;vertical-align:-0.43581800000000004em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.43581800000000004em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">T</span></span></span></span> is the length of the input sequence, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>h</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>h</mi><mi>T</mi></msub></mrow><annotation encoding="application/x-tex">h_1,\dots, h_T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> are the corresponding hidden states of the decoder and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mo>∑</mo><mi>j</mi></msub><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sum_j \alpha_{i,j}=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.185818em;vertical-align:-0.43581800000000004em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.43581800000000004em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span>. Hence the context allows passing direct information from the &#39;relevant&#39; part of the input to the decoder. The coefficients <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><msubsup><mo stretchy=false >)</mo><mrow><mi>j</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">(\alpha_{i,j})_{j=1}^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.236103em;vertical-align:-0.394772em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.441336em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.394772em;"><span></span></span></span></span></span></span></span></span></span> are computed from the current hidden state of the decoder <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>s</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">s_{i-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> and all the hidden states from the encoder <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>h</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>h</mi><mi>T</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(h_1, \dots, h_T)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> as explained below &#40;taken from the original paper&#41;:</p> <img src="/website/modules/extras/attention/attention_bahdanau.png" style="width: 620px; height: auto; display: inline"> <h2 id=pytorch_implementation ><a href="#pytorch_implementation" class=header-anchor >PyTorch implementation</a></h2> <p>In <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention.ipynb">Attention for seq2seq</a>, you can play with a simple model and code the attention mechanism proposed in the paper. For the alignment network <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>a</mi></mrow><annotation encoding="application/x-tex">a</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">a</span></span></span></span> &#40;used to define the coefficient <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub><mo>=</mo><mi>s</mi><mi>o</mi><mi>f</mi><mi>t</mi><mi>m</mi><mi>a</mi><msub><mi>x</mi><mi>j</mi></msub><mo stretchy=false >(</mo><mi>a</mi><mo stretchy=false >(</mo><msub><mi>s</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>h</mi><mi>j</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\alpha_{i,j} = softmax_{j}(a(s_{i-1},h_j))</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.716668em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.036108em;vertical-align:-0.286108em;"></span><span class="mord mathdefault">s</span><span class="mord mathdefault">o</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mord mathdefault">t</span><span class="mord mathdefault">m</span><span class="mord mathdefault">a</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">a</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">s</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span></span>&#41;, we take a MLP with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>tanh</mi><mo>⁡</mo></mrow><annotation encoding="application/x-tex">\tanh</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mop >tanh</span></span></span></span> activations. </p> <p>You will learn about seq2seq, teacher-forcing for RNNs and build the attention mechanism. To simplify things, we do not deal with batches &#40;see <a href="/website/modules/11c-batches-with-sequences">Batches with sequences in Pytorch</a> for more on that&#41;. The solution for this practical is provided in <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/12_seq2seq_attention_solution.ipynb">Attention for seq2seq- solution</a></p> <p>Note that each <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\alpha_{i,j}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.716668em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> is a real number so that we can display the matrix of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\alpha_{i,j}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.716668em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mpunct mtight">,</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span>&#39;s where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> ranges over the input tokens and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> over the output tokens, see below &#40;taken from the paper&#41;:</p> <img src="/website/modules/extras/attention/attention_translate.jpeg" style="width: 620px; height: auto; display: inline"> <h2 id=self-attention_in_transformers ><a href="#self-attention_in_transformers" class=header-anchor >&#40;Self-&#41;Attention in Transformers</a></h2> <p>We now describe the attention mechanism proposed in <a href="https://arxiv.org/abs/1706.03762">Attention Is All You Need</a> by Vaswani et al. First, we recall basic notions from retrieval systems: query/key/value illustrated by an example: search for videos on Youtube. In this example, the query is the text in the search bar, the key is the metadata associated with the videos which are the values. Hence a score can be computed from the query and all the keys. Finally, the matched video with the highest score is returned.</p> <p>We see that we can formalize this process as follows: if <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Q</mi><mi>s</mi></msub></mrow><annotation encoding="application/x-tex">Q_s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> is the current query and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>K</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">K_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>V</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">V_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> are all the keys and values in the database, we return </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Y</mi><mi>s</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><msub><mtext>softmax</mtext><mi>t</mi></msub><mo stretchy=false >(</mo><mtext>score</mtext><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>s</mi></msub><mo separator=true >,</mo><msub><mi>K</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo><msub><mi>V</mi><mi>t</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Y_s = \sum_{t=1}^T\text{softmax}_{t}(\text{score}(Q_s, K_t))V_t, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.0954490000000003em;vertical-align:-1.267113em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord text"><span class=mord >score</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><msub><mtext>softmax</mtext><mi>t</mi></msub><mo stretchy=false >(</mo><mtext>score</mtext><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>s</mi></msub><mo separator=true >,</mo><msub><mi>K</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sum_{t=1}^T\text{softmax}_{t}(\text{score}(Q_s, K_t))=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.2809409999999999em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord text"><span class=mord >score</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span>.</p> <p>Note that this formalism allows us to recover the way contexts were computed above &#40;where the score function was called the alignment network&#41;. Now, we will change the score function and consider dot-product attention: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>score</mtext><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>s</mi></msub><mo separator=true >,</mo><msub><mi>K</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mfrac><mrow><msubsup><mi>Q</mi><mi>s</mi><mi>T</mi></msubsup><msub><mi>K</mi><mi>t</mi></msub></mrow><msqrt><mi>d</mi></msqrt></mfrac></mrow><annotation encoding="application/x-tex"> \text{score}(Q_s, K_t) = \frac{Q_s^TK_t}{\sqrt{d}}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >score</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.691565em;vertical-align:-0.538em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.153565em;"><span style="top:-2.5335085em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord sqrt mtight"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.937845em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord mtight" style="padding-left:0.833em;"><span class="mord mathdefault mtight">d</span></span></span><span style="top:-2.8978450000000002em;"><span class=pstrut  style="height:3em;"></span><span class="hide-tail mtight" style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.102155em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.5102em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9190928571428572em;"><span style="top:-2.214em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">s</span></span></span><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286em;"><span></span></span></span></span></span></span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.29634285714285713em;"><span style="top:-2.357em;margin-left:-0.07153em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.538em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span>. Note that for this definition to make sense, both the query <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Q</mi><mi>s</mi></msub></mrow><annotation encoding="application/x-tex">Q_s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and the key <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>K</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">K_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> need to live in the same space and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> is the dimension of this space.</p> <p>Given <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi></mrow><annotation encoding="application/x-tex">s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">s</span></span></span></span> inputs in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><msub><mi>d</mi><mtext>in</mtext></msub></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^{d_{\text{in}}}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3340428571428572em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> denoted by a matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><msub><mi>d</mi><mtext>in</mtext></msub><mo>×</mo><mi>s</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X\in \mathbb{R}^{d_{\text{in}}\times s}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3340428571428572em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">s</span></span></span></span></span></span></span></span></span></span></span></span> and a database containing <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span> samples in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^{d&#x27;}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.94248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.94248em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> denoted by a matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>×</mo><mi>t</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X&#x27;\in \mathbb{R}^{d&#x27;\times t}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.790992em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.94248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.94248em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">t</span></span></span></span></span></span></span></span></span></span></span></span>, we define:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>the queries: </mtext><mi>Q</mi><mo>=</mo><msub><mi>W</mi><mi>Q</mi></msub><mi>X</mi><mo separator=true >,</mo><mtext> with, </mtext><msub><mi>W</mi><mi>Q</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><msub><mi>d</mi><mtext>in</mtext></msub></mrow></msup><mspace linebreak=newline ></mspace><mtext>the keys: </mtext><mi>K</mi><mo>=</mo><msub><mi>W</mi><mi>K</mi></msub><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo separator=true >,</mo><mtext> with, </mtext><msub><mi>W</mi><mi>K</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow></msup><mspace linebreak=newline ></mspace><mtext>the values: </mtext><mi>V</mi><mo>=</mo><msub><mi>W</mi><mi>V</mi></msub><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo separator=true >,</mo><mtext> with, </mtext><msub><mi>W</mi><mi>V</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><msub><mi>d</mi><mtext>out</mtext></msub><mo>×</mo><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow></msup></mrow><annotation encoding="application/x-tex"> \text{the queries: } Q = W_Q X, \text{ with, } W_Q\in \mathbb{R}^{k\times d_{\text{in}}}\\ \text{the keys: } K = W_K X&#x27;, \text{ with, } W_K\in \mathbb{R}^{k\times d&#x27;}\\ \text{the values: } V = W_V X&#x27;, \text{ with, } W_V\in \mathbb{R}^{d_{\text{out}}\times d&#x27;} </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord text"><span class=mord >the queries: </span></span><span class="mord mathdefault">Q</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.980548em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.328331em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.328331em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.899108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.899108em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3340428571428572em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord text"><span class=mord >the keys: </span></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.996332em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.801892em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.99248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.99248em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord text"><span class=mord >the values: </span></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.996332em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.801892em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.99248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.99248em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.29634285714285713em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">out</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> <p>Now self-attention is simply obtained with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>=</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow><annotation encoding="application/x-tex">X=X&#x27;</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span> &#40;so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>d</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>=</mo><msub><mi>d</mi><mtext>in</mtext></msub></mrow><annotation encoding="application/x-tex">d&#x27;=d_{\text{in}}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31750199999999995em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>&#41; and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>d</mi><mtext>in</mtext></msub><mo>=</mo><msub><mi>d</mi><mtext>out</mtext></msub><mo>=</mo><mi>d</mi></mrow><annotation encoding="application/x-tex">d_{\text{in}} = d_{\text{out}} = d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31750199999999995em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">out</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span>. In summary, self-attention layer can take as input any tensor of the form <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>T</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X \in \mathbb{R}^{d\times T}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span></span> &#40;for any <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>T</mi></mrow><annotation encoding="application/x-tex">T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">T</span></span></span></span>&#41; has parameters: </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>W</mi><mi>Q</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo><msub><mi>W</mi><mi>K</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>k</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo><msub><mi>W</mi><mi>V</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> W_Q\in \mathbb{R}^{k\times d}, W_K\in \mathbb{R}^{k\times d}, W_V\in \mathbb{R}^{d\times d}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.969438em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.328331em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.093548em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.093548em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.093548em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>and produce <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Y</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>T</mi></mrow></msup></mrow><annotation encoding="application/x-tex">Y \in \mathbb{R}^{d\times T}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span></span> &#40;with same <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span> as for the input&#41;. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> is the dimension of the input and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span></span></span></span> is a hyper-parameter of the self-attention layer:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Y</mi><mi>s</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><msub><mtext>softmax</mtext><mi>t</mi></msub><mrow><mo fence=true >(</mo><mfrac><mrow><msubsup><mi>X</mi><mi>s</mi><mi>T</mi></msubsup><msubsup><mi>W</mi><mi>Q</mi><mi>T</mi></msubsup><msub><mi>W</mi><mi>K</mi></msub><msub><mi>X</mi><mi>t</mi></msub></mrow><msqrt><mi>k</mi></msqrt></mfrac><mo fence=true >)</mo></mrow><msub><mi>W</mi><mi>V</mi></msub><msub><mi>X</mi><mi>t</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Y_s = \sum_{t=1}^T\text{softmax}_{t}\left(\frac{X_s^TW_Q^TW_KX_t}{\sqrt{k}}\right)W_VX_t, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.0954490000000003em;vertical-align:-1.267113em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.64277em;"><span style="top:-2.17778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.93222em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class="mord mathdefault" style="margin-right:0.03148em;">k</span></span></span><span style="top:-2.89222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.10777999999999999em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.8014390000000002em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.4530000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.411439em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>with the convention that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>X</mi><mi>t</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">X_t\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.07847em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span> &#40;resp. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Y</mi><mi>s</mi></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">Y_s\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>&#41; is the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span>-th column of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi></mrow><annotation encoding="application/x-tex">X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span> &#40;resp. the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi></mrow><annotation encoding="application/x-tex">s</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">s</span></span></span></span>-th column of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Y</mi></mrow><annotation encoding="application/x-tex">Y</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span></span></span></span>&#41;. Note that the notation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mtext>softmax</mtext><mi>t</mi></msub><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{softmax}_{t}(.)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span></span></span></span> might be a bit confusing. Recall that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>softmax</mtext></mrow><annotation encoding="application/x-tex">\text{softmax}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord text"><span class=mord >softmax</span></span></span></span></span> is always taking as input a vector and returning a &#40;normalized&#41; vector. In practice, most of the time, we are dealing with batches so that the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>softmax</mtext></mrow><annotation encoding="application/x-tex">\text{softmax}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord text"><span class=mord >softmax</span></span></span></span></span> function is taking as input a matrix &#40;or tensor&#41; and we need to normalize according to the right axis&#33; Named tensor notation see <a href="#transformers_using_named_tensor_notation">below</a> deals with this notational issue. I also find the interpretation given below helpful:</p> <p><strong>Mental model for self-attention:</strong> self-attention interpreted as taking expectation</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>s</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><mi>p</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mi>v</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=double-struck >E</mi><mo stretchy=false >[</mo><mi>v</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >]</mo><mo separator=true >,</mo><mspace linebreak=newline ></mspace><mtext>with, </mtext><mi>p</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mfrac><mrow><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mi>k</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><mrow><munder><mo>∑</mo><mi>r</mi></munder><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>s</mi></msub><mo stretchy=false >)</mo><mi>k</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>r</mi></msub><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow></mfrac><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> y_s = \sum_{t=1}^T p(x_t | x_s) v(x_t) = \mathbb{E}[v(x) | x_s],\\ \text{with, } p(x_t|x_s) = \frac{\exp(q(x_s)k(x_t))}{\sum_{r}\exp(q(x_s)k(x_r))}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.0954490000000003em;vertical-align:-1.267113em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">p</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mord mathdefault" style="margin-right:0.03588em;">v</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbb">E</span></span><span class=mopen >[</span><span class="mord mathdefault" style="margin-right:0.03588em;">v</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >]</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >with, </span></span><span class="mord mathdefault">p</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.41271em;vertical-align:-0.9857100000000001em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.427em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.0016819999999999613em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">r</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >exp</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">r</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mop >exp</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">s</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.9857100000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span></span></span></span></span> <p>where the mappings <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo><mo separator=true >,</mo><mi>k</mi><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(.), k(.)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>v</mi><mo stretchy=false >(</mo><mi mathvariant=normal >.</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">v(.)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">v</span><span class=mopen >(</span><span class=mord >.</span><span class=mclose >)</span></span></span></span> represent query, key and value.</p> <p>Multi-head attention combines several such operations in parallel, and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Y</mi></mrow><annotation encoding="application/x-tex">Y</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">Y</span></span></span></span> is the concatenation of the results along the feature dimension to which is applied one more linear transformation.</p> <h2 id=transformer_block ><a href="#transformer_block" class=header-anchor >Transformer block</a></h2> <img src="/website/modules/extras/attention/block_transformer.png" style="width: 320px; height: auto; display: inline"> <p>To finish the description of a transformer block, we need to define two last layers: Layer Norm and Feed Forward Network.</p> <p>The Layer Norm used in the transformer block is particularly simple as it acts on vectors and standardizes it as follows: for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">x\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>, we define</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>mean</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mfrac><mn>1</mn><mi>d</mi></mfrac><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>d</mi></munderover><msub><mi>x</mi><mi>i</mi></msub><mo>∈</mo><mi mathvariant=double-struck >R</mi><mspace linebreak=newline ></mspace><mtext>std</mtext><mo stretchy=false >(</mo><mi>x</mi><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>=</mo><mfrac><mn>1</mn><mi>d</mi></mfrac><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>d</mi></munderover><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo>−</mo><mtext>mean</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>∈</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex"> \text{mean}(x) =\frac{1}{d}\sum_{i=1}^d x_i\in \mathbb{R}\\ \text{std}(x)^2 = \frac{1}{d}\sum_{i=1}^d(x_i-\text{mean}(x))^2\in \mathbb{R} </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >mean</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.1137820000000005em;vertical-align:-1.277669em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">d</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8361130000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >std</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.1137820000000005em;vertical-align:-1.277669em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">d</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8361130000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >mean</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span></span> <p>and then the Layer Norm has two parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>γ</mi><mo separator=true >,</mo><mi>β</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">\gamma, \beta\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span> and </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>L</mi><mi>N</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>γ</mi><mo>⋅</mo><mfrac><mrow><mi>x</mi><mo>−</mo><mtext>mean</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo></mrow><mrow><mtext>std</mtext><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo></mrow></mfrac><mo>+</mo><mi>β</mi><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> LN(x) = \gamma \cdot \frac{x-\text{mean}(x)}{\text{std}(x)}+\beta, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">L</span><span class="mord mathdefault" style="margin-right:0.10903em;">N</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.63889em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋅</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:2.363em;vertical-align:-0.936em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.427em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord text"><span class=mord >std</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord text"><span class=mord >mean</span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.936em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=mpunct >,</span></span></span></span></span> <p>where we used the natural broadcasting rule for subtracting the mean and dividing by std and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo>⋅</mo></mrow><annotation encoding="application/x-tex">\cdot</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44445em;vertical-align:0em;"></span><span class=mord >⋅</span></span></span></span> is component-wise multiplication.</p> <p>A Feed Forward Network is an MLP acting on vectors: for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">x\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>, we define </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi><mi>F</mi><mi>N</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>max</mi><mo>⁡</mo><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>x</mi><msub><mi>W</mi><mn>1</mn></msub><mo>+</mo><msub><mi>b</mi><mn>1</mn></msub><mo stretchy=false >)</mo><msub><mi>W</mi><mn>2</mn></msub><mo>+</mo><msub><mi>b</mi><mn>2</mn></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> FFN(x) = \max(0,xW_1+b_1)W_2+b_2, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class="mord mathdefault" style="margin-right:0.10903em;">N</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mop >max</span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">x</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>W</mi><mn>1</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>d</mi><mo>×</mo><mi>h</mi></mrow></msup></mrow><annotation encoding="application/x-tex">W_1\in \mathbb{R}^{d\times h}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">h</span></span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>b</mi><mn>1</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>h</mi></msup></mrow><annotation encoding="application/x-tex">b_1\in \mathbb{R}^h</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">h</span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>W</mi><mn>2</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>h</mi><mo>×</mo><mi>d</mi></mrow></msup></mrow><annotation encoding="application/x-tex">W_2\in \mathbb{R}^{h\times d}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">h</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>b</mi><mn>2</mn></msub><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">b_2\in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>.</p> <p>Each of these layers is applied on each of the inputs given to the transformer block as depicted below:</p> <p><img src="/website/modules/extras/attention/transformer_block_nocode.png" alt="" /></p> <p>Note that this block is equivariant: if we permute the inputs, then the outputs will be permuted with the same permutation. As a result, the order of the input is irrelevant to the transformer block. In particular, this order cannot be used. The important notion of positional encoding allows us to take order into account. It is a deterministic unique encoding for each time step that is added to the input tokens.</p> <h2 id=transformers_using_named_tensor_notation ><a href="#transformers_using_named_tensor_notation" class=header-anchor >Transformers using Named Tensor Notation</a></h2> <p>In <a href="https://hackmd.io/@mlelarge/HkVlvrc8j">Transformers using Named Tensor Notation</a>, we derive the formal equations for the Transformer block using named tensor notation.</p> <h2 id=hacking_a_simple_transformer_block ><a href="#hacking_a_simple_transformer_block" class=header-anchor >Hacking a simple Transformer block</a></h2> <p>Now is the time to have fun building a simple transformer block and to <a href="https://github.com/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb">think like transformers</a> &#40;open in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module12/GPT_hist.ipynb">colab</a>&#41;.</p> <img src="/website/modules/extras/attention/attention_matrix.png" style="width: 320px; height: auto; display: inline"> <img src="/website/modules/extras/attention/attention_matrix2.png" style="width: 320px; height: auto; display: inline"> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/12-intro-julia/index.html b/modules/12-intro-julia/index.html
index 0b2c55b..549ac6b 100644
--- a/modules/12-intro-julia/index.html
+++ b/modules/12-intro-julia/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item active" href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_introduction_to_julia_automatic_differentiation_with_dual_numbers ><a href="#module_-_introduction_to_julia_automatic_differentiation_with_dual_numbers" class=header-anchor >Module - Introduction to Julia: Automatic differentiation with dual numbers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#introduction_to_julia_automatic_differentiation_with_dual_numbers">Introduction to Julia: Automatic differentiation with dual numbers</a><li><a href="#notebook">Notebook</a></ol></div> <h2 id=introduction_to_julia_automatic_differentiation_with_dual_numbers ><a href="#introduction_to_julia_automatic_differentiation_with_dual_numbers" class=header-anchor >Introduction to Julia: Automatic differentiation with dual numbers</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'oL_T_NVoz9w', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Dual numbers in Julia <br> <a href='#player' onclick='changeYouTubeSource(527,0)'> 8:47</a> Using conversion and promotion <br> <a href='#player' onclick='changeYouTubeSource(805,0)'> 13:25</a> Automatic differentiation for polynomials <br> <a href='#player' onclick='changeYouTubeSource(1055,0)'> 17:35</a> Using Babylonian algorithm for the square root <br> <a href='#player' onclick='changeYouTubeSource(1467,0)'> 24:27</a> Checking the derivative by hand <br> <a href='#player' onclick='changeYouTubeSource(1537,0)'> 25:37</a> Pkg <a href="https://github.com/JuliaDiff/ForwardDiff.jl">ForwardDiff.jl</a></p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/julia-notebooks.jl/blob/main/AD_with_dual_numbers_Julia.ipynb">notebook</a> &#40;you need to install Julia&#41; or use:</p> </ul> <p><a href="https://mybinder.org/v2/gh/dataflowr/julia-notebooks.jl/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt=Binder  /></a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item active" href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_introduction_to_julia_automatic_differentiation_with_dual_numbers ><a href="#module_-_introduction_to_julia_automatic_differentiation_with_dual_numbers" class=header-anchor >Module - Introduction to Julia: Automatic differentiation with dual numbers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#introduction_to_julia_automatic_differentiation_with_dual_numbers">Introduction to Julia: Automatic differentiation with dual numbers</a><li><a href="#notebook">Notebook</a></ol></div> <h2 id=introduction_to_julia_automatic_differentiation_with_dual_numbers ><a href="#introduction_to_julia_automatic_differentiation_with_dual_numbers" class=header-anchor >Introduction to Julia: Automatic differentiation with dual numbers</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'oL_T_NVoz9w', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Dual numbers in Julia <br> <a href='#player' onclick='changeYouTubeSource(527,0)'> 8:47</a> Using conversion and promotion <br> <a href='#player' onclick='changeYouTubeSource(805,0)'> 13:25</a> Automatic differentiation for polynomials <br> <a href='#player' onclick='changeYouTubeSource(1055,0)'> 17:35</a> Using Babylonian algorithm for the square root <br> <a href='#player' onclick='changeYouTubeSource(1467,0)'> 24:27</a> Checking the derivative by hand <br> <a href='#player' onclick='changeYouTubeSource(1537,0)'> 25:37</a> Pkg <a href="https://github.com/JuliaDiff/ForwardDiff.jl">ForwardDiff.jl</a></p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/julia-notebooks.jl/blob/main/AD_with_dual_numbers_Julia.ipynb">notebook</a> &#40;you need to install Julia&#41; or use:</p> </ul> <p><a href="https://mybinder.org/v2/gh/dataflowr/julia-notebooks.jl/HEAD"><img src="https://mybinder.org/badge_logo.svg" alt=Binder  /></a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/13-siamese/index.html b/modules/13-siamese/index.html
index 2e8d316..d11c11c 100644
--- a/modules/13-siamese/index.html
+++ b/modules/13-siamese/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_13_-_siamese_networks_and_representation_learning ><a href="#module_13_-_siamese_networks_and_representation_learning" class=header-anchor >Module 13 - Siamese Networks and Representation Learning</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#siamese_networks_and_representation_learning">Siamese Networks and Representation Learning</a><li><a href="#slides_and_notebook">Slides and Notebook</a></ol></div> <h2 id=siamese_networks_and_representation_learning ><a href="#siamese_networks_and_representation_learning" class=header-anchor >Siamese Networks and Representation Learning</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: '6x4IPITZ4dw', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(57,0)'> 0:57</a> Siamese networks for face recognition <br> <a href='#player' onclick='changeYouTubeSource(261,0)'> 4:21</a> Siamese architecture <br> <a href='#player' onclick='changeYouTubeSource(369,0)'> 6:09</a> Contrastive loss <br> <a href='#player' onclick='changeYouTubeSource(683,0)'> 11:23</a> Training siamese networks <br> <a href='#player' onclick='changeYouTubeSource(866,0)'> 14:26</a> Triplet architecture <br> <a href='#player' onclick='changeYouTubeSource(900,0)'> 15:00</a> Triplet loss <br> <a href='#player' onclick='changeYouTubeSource(1036,0)'> 17:16</a> Training with triplet loss <br> <a href='#player' onclick='changeYouTubeSource(1065,0)'> 17:45</a> Pytorch code <br> <a href='#player' onclick='changeYouTubeSource(1220,0)'> 20:20</a> Hard negative sampling <br> <a href='#player' onclick='changeYouTubeSource(1375,0)'> 22:55</a> Applications <br> <a href='#player' onclick='changeYouTubeSource(1860,0)'> 31:00</a> N-pair loss <br> <a href='#player' onclick='changeYouTubeSource(1926,0)'> 32:06</a> Histogram loss <br> <a href='#player' onclick='changeYouTubeSource(2015,0)'> 33:35</a> Prototypical networks <br> <a href='#player' onclick='changeYouTubeSource(2170,0)'> 36:10</a> Take-away </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://abursuc.github.io//slides/polytechnique/13-siamese-networks.html#1">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module13/13_siamese_triplet_mnist_empty.ipynb">notebook</a> </p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_13_-_siamese_networks_and_representation_learning ><a href="#module_13_-_siamese_networks_and_representation_learning" class=header-anchor >Module 13 - Siamese Networks and Representation Learning</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#siamese_networks_and_representation_learning">Siamese Networks and Representation Learning</a><li><a href="#slides_and_notebook">Slides and Notebook</a></ol></div> <h2 id=siamese_networks_and_representation_learning ><a href="#siamese_networks_and_representation_learning" class=header-anchor >Siamese Networks and Representation Learning</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: '6x4IPITZ4dw', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(57,0)'> 0:57</a> Siamese networks for face recognition <br> <a href='#player' onclick='changeYouTubeSource(261,0)'> 4:21</a> Siamese architecture <br> <a href='#player' onclick='changeYouTubeSource(369,0)'> 6:09</a> Contrastive loss <br> <a href='#player' onclick='changeYouTubeSource(683,0)'> 11:23</a> Training siamese networks <br> <a href='#player' onclick='changeYouTubeSource(866,0)'> 14:26</a> Triplet architecture <br> <a href='#player' onclick='changeYouTubeSource(900,0)'> 15:00</a> Triplet loss <br> <a href='#player' onclick='changeYouTubeSource(1036,0)'> 17:16</a> Training with triplet loss <br> <a href='#player' onclick='changeYouTubeSource(1065,0)'> 17:45</a> Pytorch code <br> <a href='#player' onclick='changeYouTubeSource(1220,0)'> 20:20</a> Hard negative sampling <br> <a href='#player' onclick='changeYouTubeSource(1375,0)'> 22:55</a> Applications <br> <a href='#player' onclick='changeYouTubeSource(1860,0)'> 31:00</a> N-pair loss <br> <a href='#player' onclick='changeYouTubeSource(1926,0)'> 32:06</a> Histogram loss <br> <a href='#player' onclick='changeYouTubeSource(2015,0)'> 33:35</a> Prototypical networks <br> <a href='#player' onclick='changeYouTubeSource(2170,0)'> 36:10</a> Take-away </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://abursuc.github.io//slides/polytechnique/13-siamese-networks.html#1">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module13/13_siamese_triplet_mnist_empty.ipynb">notebook</a> </p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/14a-depth/index.html b/modules/14a-depth/index.html
index 1e354c8..7498796 100644
--- a/modules/14a-depth/index.html
+++ b/modules/14a-depth/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item active" href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_14a_-_the_benefits_of_depth ><a href="#module_14a_-_the_benefits_of_depth" class=header-anchor >Module 14a - The Benefits of Depth</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#benefits_of_depth">Benefits of Depth</a><li><a href="#slides">Slides</a></ol></div> <h2 id=benefits_of_depth ><a href="#benefits_of_depth" class=header-anchor >Benefits of Depth</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'l-J93Onvj70', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-01-deeper.html#1">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item active" href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_14a_-_the_benefits_of_depth ><a href="#module_14a_-_the_benefits_of_depth" class=header-anchor >Module 14a - The Benefits of Depth</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#benefits_of_depth">Benefits of Depth</a><li><a href="#slides">Slides</a></ol></div> <h2 id=benefits_of_depth ><a href="#benefits_of_depth" class=header-anchor >Benefits of Depth</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'l-J93Onvj70', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-01-deeper.html#1">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/14b-depth/index.html b/modules/14b-depth/index.html
index afcc02e..2b4a518 100644
--- a/modules/14b-depth/index.html
+++ b/modules/14b-depth/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item active" href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_14b_-_the_problems_with_depth ><a href="#module_14b_-_the_problems_with_depth" class=header-anchor >Module 14b - The Problems with Depth</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#the_problems_with_depth">The Problems with Depth</a><li><a href="#slides">Slides</a></ol></div> <h2 id=the_problems_with_depth ><a href="#the_problems_with_depth" class=header-anchor >The Problems with Depth</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'cBrDJ0KHxCs', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-02-problems.html#1">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item active" href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_14b_-_the_problems_with_depth ><a href="#module_14b_-_the_problems_with_depth" class=header-anchor >Module 14b - The Problems with Depth</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#the_problems_with_depth">The Problems with Depth</a><li><a href="#slides">Slides</a></ol></div> <h2 id=the_problems_with_depth ><a href="#the_problems_with_depth" class=header-anchor >The Problems with Depth</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'cBrDJ0KHxCs', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-02-problems.html#1">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/15-dropout/index.html b/modules/15-dropout/index.html
index e2e2b3a..bed8450 100644
--- a/modules/15-dropout/index.html
+++ b/modules/15-dropout/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item active" href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_15_-_dropout ><a href="#module_15_-_dropout" class=header-anchor >Module 15 - Dropout</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#dropout">Dropout</a><li><a href="#slides_and_notebook">Slides and Notebook</a></ol></div> <h2 id=dropout ><a href="#dropout" class=header-anchor >Dropout</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'gWiJr_y0Tgs', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/15-01-dropout.html#1">Dropout</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module15/15a_dropout_intro.ipynb">notebook 1</a> </p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module15/15b_dropout_mnist.ipynb">notebook 2</a> </p> <li><p><a href="https://abursuc.github.io/slides/polytechnique/15-02-uncertainty-estimation-dropout.html#1">Uncertainty estimation - MC Dropout</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item active" href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_15_-_dropout ><a href="#module_15_-_dropout" class=header-anchor >Module 15 - Dropout</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#dropout">Dropout</a><li><a href="#slides_and_notebook">Slides and Notebook</a></ol></div> <h2 id=dropout ><a href="#dropout" class=header-anchor >Dropout</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'gWiJr_y0Tgs', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/15-01-dropout.html#1">Dropout</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module15/15a_dropout_intro.ipynb">notebook 1</a> </p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module15/15b_dropout_mnist.ipynb">notebook 2</a> </p> <li><p><a href="https://abursuc.github.io/slides/polytechnique/15-02-uncertainty-estimation-dropout.html#1">Uncertainty estimation - MC Dropout</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/16-batchnorm/index.html b/modules/16-batchnorm/index.html
index e612300..8dfd96f 100644
--- a/modules/16-batchnorm/index.html
+++ b/modules/16-batchnorm/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item active" href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_16_-_batchnorm ><a href="#module_16_-_batchnorm" class=header-anchor >Module 16 - Batchnorm</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#batchnorm">Batchnorm</a><li><a href="#slides_and_notebook">Slides and Notebook</a></ol></div> <h2 id=batchnorm ><a href="#batchnorm" class=header-anchor >Batchnorm</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'cKSCyTODbDI', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-04-batchnorm.html#1">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module16/16_batchnorm_simple.ipynb">notebook</a> </p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item active" href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_16_-_batchnorm ><a href="#module_16_-_batchnorm" class=header-anchor >Module 16 - Batchnorm</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#batchnorm">Batchnorm</a><li><a href="#slides_and_notebook">Slides and Notebook</a></ol></div> <h2 id=batchnorm ><a href="#batchnorm" class=header-anchor >Batchnorm</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'cKSCyTODbDI', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-04-batchnorm.html#1">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module16/16_batchnorm_simple.ipynb">notebook</a> </p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/17-resnets/index.html b/modules/17-resnets/index.html
index b365bb0..1a8d7ec 100644
--- a/modules/17-resnets/index.html
+++ b/modules/17-resnets/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item active" href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_17_-_resnets ><a href="#module_17_-_resnets" class=header-anchor >Module 17 - Resnets</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#resnets">Resnets</a><li><a href="#slides">Slides</a></ol></div> <h2 id=resnets ><a href="#resnets" class=header-anchor >Resnets</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'LdJV4oHgO28', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-05-resnets.html#1">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item active" href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_17_-_resnets ><a href="#module_17_-_resnets" class=header-anchor >Module 17 - Resnets</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#resnets">Resnets</a><li><a href="#slides">Slides</a></ol></div> <h2 id=resnets ><a href="#resnets" class=header-anchor >Resnets</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'LdJV4oHgO28', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://abursuc.github.io/slides/polytechnique/14-05-resnets.html#1">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/18a-diffusion/index.html b/modules/18a-diffusion/index.html
index 83dd329..d0adf02 100644
--- a/modules/18a-diffusion/index.html
+++ b/modules/18a-diffusion/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>ddpm</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item active" href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_18a_-_denoising_diffusion_probabilistic_models ><a href="#module_18a_-_denoising_diffusion_probabilistic_models" class=header-anchor >Module 18a - Denoising Diffusion Probabilistic Models</a></h1> <p>This module presents the work: <a href="https://arxiv.org/abs/2006.11239">Denoising Diffusion Probabilistic Models</a> by Jonathan Ho, Ajay Jain, Pieter Abbeel &#40;2020&#41;. It starts with a description of the algorithm, then provides some notebooks to implement it on MNIST and CIFAR10 and finishes with some technical details.</p> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#algorithm">Algorithm </a><ol><li><a href="#forward_diffusion_process">Forward diffusion process</a><li><a href="#approximating_the_reversed_diffusion">Approximating the reversed diffusion </a><li><a href="#sampling">Sampling</a></ol><li><a href="#summary_a_hrefhttpsarxivorgabs200611239denoising_diffusion_probabilistic_models">Summary: <a href="https://arxiv.org/abs/2006.11239">Denoising Diffusion Probabilistic Models</a> </a><li><a href="#implementation">Implementation</a><ol><li><a href="#mnist">MNIST</a><li><a href="#cifar10">CIFAR10</a></ol><li><a href="#technical_details">Technical details</a></ol></div> <h2 id=algorithm ><a href="#algorithm" class=header-anchor >Algorithm </a></h2> <img src="../extras/diffusions/ddpm.png" style="width: 620px; height: auto; display: inline"> <h3 id=forward_diffusion_process ><a href="#forward_diffusion_process" class=header-anchor >Forward diffusion process</a></h3> <p>Given a schedule <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>β</mi><mn>1</mn></msub><mo>&lt;</mo><msub><mi>β</mi><mn>2</mn></msub><mo>&lt;</mo><mo>⋯</mo><mo>&lt;</mo><msub><mi>β</mi><mi>T</mi></msub></mrow><annotation encoding="application/x-tex">\beta_1&lt;\beta_2&lt;\dots &lt;\beta_T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>,</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >;</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>β</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mn>1</mn><mo>:</mo><mi>T</mi></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><munderover><mo>∏</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} q(x_t|x_{t-1}) &amp;= \mathcal{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_t I)\\ q(x_{1:T}|x_0) &amp;= \prod_{t=1}^T q(x_t|x_{t-1}) \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:5.039324000000001em;vertical-align:-2.2696620000000003em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.7696620000000003em;"><span style="top:-5.614123em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span><span style="top:-3.125787em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">1</span><span class="mrel mtight">:</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.2696620000000003em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.7696620000000003em;"><span style="top:-5.614123em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.983875em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.9438750000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25612499999999994em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span><span style="top:-3.125787em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∏</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.2696620000000003em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>We define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mi>t</mi></msub><mo>=</mo><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">\alpha_t = 1-\beta_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.72777em;vertical-align:-0.08333em;"></span><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent=true ><msub><mi>α</mi><mi>t</mi></msub><mo stretchy=true >‾</mo></mover><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>t</mi></msubsup><msub><mi>α</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">\overline{\alpha_t} = \prod_{i=1}^t\alpha_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.78056em;vertical-align:-0.15em;"></span><span class="mord overline"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.233166em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.933456em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, then we have</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><msub><mi>x</mi><mi>t</mi></msub></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><mtext> with </mtext><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msqrt><mrow><msub><mi>α</mi><mi>t</mi></msub><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>2</mn></mrow></msub><mo>+</mo><msqrt><mrow><msub><mi>α</mi><mi>t</mi></msub><mo stretchy=false >(</mo><mn>1</mn><mo>−</mo><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow></msqrt><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>2</mn></mrow></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msqrt><mrow><msub><mi>α</mi><mi>t</mi></msub><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>2</mn></mrow></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></msqrt><msub><mover accent=true ><mi>ϵ</mi><mo>~</mo></mover><mi>t</mi></msub></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_t &amp;= \sqrt{\alpha_t} x_{t-1} + \sqrt{1-\alpha_t}\epsilon_{t-1},\text{ with }\epsilon_{t-1}\sim\mathcal{N}(0,I)\\ &amp;= \sqrt{\alpha_t\alpha_{t-1}} x_{t-2} +\sqrt{\alpha_t(1-\alpha_{t-1})}\epsilon_{t-2}+\sqrt{1-\alpha_t}\epsilon_{t-1}\\ &amp;= \sqrt{\alpha_t\alpha_{t-1}} x_{t-2} + \sqrt{1-\alpha_t\alpha_{t-1}}\tilde{\epsilon}_{t} \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:4.7968995em;vertical-align:-2.14844975em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.64844975em;"><span style="top:-4.76735475em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.1234797499999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ></span></span><span style="top:-1.51155025em;"><span class=pstrut  style="height:3em;"></span><span class=mord ></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.14844975em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.64844975em;"><span style="top:-4.76735475em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.774155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.734155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.265845em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span><span style="top:-3.1234797499999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7449895em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.7049895em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.2950105em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.983875em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span><span style="top:-2.9438750000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25612499999999994em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-1.51155025em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7449895em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.7049895em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.2950105em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9519294999999999em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.9119295000000003em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.2880705000000001em;"><span></span></span></span></span></span><span class=mord ><span class="mord accent"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.6678599999999999em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span></span></span><span style="top:-3.35em;"><span class=pstrut  style="height:3em;"></span><span class=accent-body  style="left:-0.19444em;"><span class=mord >~</span></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.14844975em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>Hence, we have</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><msqrt><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></msqrt><msub><mi>x</mi><mn>0</mn></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt><mi>ϵ</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_t = \sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5410950000000003em;vertical-align:-0.5205475em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0205475000000002em;"><span style="top:-3.1394525em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.874155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.834155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.16584500000000002em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5205475em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <pre><code class="python hljs"><span class=hljs-keyword >class</span> <span class="hljs-title class_">DDPM</span>(nn.Module):
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>ddpm</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item active" href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_18a_-_denoising_diffusion_probabilistic_models ><a href="#module_18a_-_denoising_diffusion_probabilistic_models" class=header-anchor >Module 18a - Denoising Diffusion Probabilistic Models</a></h1> <p>This module presents the work: <a href="https://arxiv.org/abs/2006.11239">Denoising Diffusion Probabilistic Models</a> by Jonathan Ho, Ajay Jain, Pieter Abbeel &#40;2020&#41;. It starts with a description of the algorithm, then provides some notebooks to implement it on MNIST and CIFAR10 and finishes with some technical details.</p> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#algorithm">Algorithm </a><ol><li><a href="#forward_diffusion_process">Forward diffusion process</a><li><a href="#approximating_the_reversed_diffusion">Approximating the reversed diffusion </a><li><a href="#sampling">Sampling</a></ol><li><a href="#summary_a_hrefhttpsarxivorgabs200611239denoising_diffusion_probabilistic_models">Summary: <a href="https://arxiv.org/abs/2006.11239">Denoising Diffusion Probabilistic Models</a> </a><li><a href="#implementation">Implementation</a><ol><li><a href="#mnist">MNIST</a><li><a href="#cifar10">CIFAR10</a></ol><li><a href="#technical_details">Technical details</a></ol></div> <h2 id=algorithm ><a href="#algorithm" class=header-anchor >Algorithm </a></h2> <img src="../extras/diffusions/ddpm.png" style="width: 620px; height: auto; display: inline"> <h3 id=forward_diffusion_process ><a href="#forward_diffusion_process" class=header-anchor >Forward diffusion process</a></h3> <p>Given a schedule <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>β</mi><mn>1</mn></msub><mo>&lt;</mo><msub><mi>β</mi><mn>2</mn></msub><mo>&lt;</mo><mo>⋯</mo><mo>&lt;</mo><msub><mi>β</mi><mi>T</mi></msub></mrow><annotation encoding="application/x-tex">\beta_1&lt;\beta_2&lt;\dots &lt;\beta_T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>,</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >;</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>β</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mn>1</mn><mo>:</mo><mi>T</mi></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><munderover><mo>∏</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} q(x_t|x_{t-1}) &amp;= \mathcal{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_t I)\\ q(x_{1:T}|x_0) &amp;= \prod_{t=1}^T q(x_t|x_{t-1}) \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:5.039324000000001em;vertical-align:-2.2696620000000003em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.7696620000000003em;"><span style="top:-5.614123em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span><span style="top:-3.125787em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">1</span><span class="mrel mtight">:</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.2696620000000003em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.7696620000000003em;"><span style="top:-5.614123em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.983875em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.9438750000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25612499999999994em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span><span style="top:-3.125787em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∏</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.2696620000000003em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>We define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mi>t</mi></msub><mo>=</mo><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">\alpha_t = 1-\beta_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.72777em;vertical-align:-0.08333em;"></span><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent=true ><msub><mi>α</mi><mi>t</mi></msub><mo stretchy=true >‾</mo></mover><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>t</mi></msubsup><msub><mi>α</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">\overline{\alpha_t} = \prod_{i=1}^t\alpha_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.78056em;vertical-align:-0.15em;"></span><span class="mord overline"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.233166em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.933456em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, then we have</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><msub><mi>x</mi><mi>t</mi></msub></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><mtext> with </mtext><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msqrt><mrow><msub><mi>α</mi><mi>t</mi></msub><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>2</mn></mrow></msub><mo>+</mo><msqrt><mrow><msub><mi>α</mi><mi>t</mi></msub><mo stretchy=false >(</mo><mn>1</mn><mo>−</mo><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow></msqrt><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>2</mn></mrow></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>ϵ</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msqrt><mrow><msub><mi>α</mi><mi>t</mi></msub><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>2</mn></mrow></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub><msub><mi>α</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow></msqrt><msub><mover accent=true ><mi>ϵ</mi><mo>~</mo></mover><mi>t</mi></msub></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_t &amp;= \sqrt{\alpha_t} x_{t-1} + \sqrt{1-\alpha_t}\epsilon_{t-1},\text{ with }\epsilon_{t-1}\sim\mathcal{N}(0,I)\\ &amp;= \sqrt{\alpha_t\alpha_{t-1}} x_{t-2} +\sqrt{\alpha_t(1-\alpha_{t-1})}\epsilon_{t-2}+\sqrt{1-\alpha_t}\epsilon_{t-1}\\ &amp;= \sqrt{\alpha_t\alpha_{t-1}} x_{t-2} + \sqrt{1-\alpha_t\alpha_{t-1}}\tilde{\epsilon}_{t} \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:4.7968995em;vertical-align:-2.14844975em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.64844975em;"><span style="top:-4.76735475em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.1234797499999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ></span></span><span style="top:-1.51155025em;"><span class=pstrut  style="height:3em;"></span><span class=mord ></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.14844975em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.64844975em;"><span style="top:-4.76735475em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.774155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.734155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.265845em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span><span style="top:-3.1234797499999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7449895em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.7049895em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.2950105em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.983875em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span><span style="top:-2.9438750000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25612499999999994em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-1.51155025em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7449895em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.7049895em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.2950105em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9519294999999999em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.9119295000000003em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.2880705000000001em;"><span></span></span></span></span></span><span class=mord ><span class="mord accent"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.6678599999999999em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span></span></span><span style="top:-3.35em;"><span class=pstrut  style="height:3em;"></span><span class=accent-body  style="left:-0.19444em;"><span class=mord >~</span></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.14844975em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>Hence, we have</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><msqrt><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></msqrt><msub><mi>x</mi><mn>0</mn></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt><mi>ϵ</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_t = \sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5410950000000003em;vertical-align:-0.5205475em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0205475000000002em;"><span style="top:-3.1394525em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.874155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.834155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.16584500000000002em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5205475em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <pre><code class="python hljs"><span class=hljs-keyword >class</span> <span class="hljs-title class_">DDPM</span>(nn.Module):
     <span class=hljs-keyword >def</span> <span class="hljs-title function_">__init__</span>(<span class=hljs-params >self, network, num_timesteps, 
             beta_start=<span class=hljs-number >0.0001</span>, beta_end=<span class=hljs-number >0.02</span>, device=device</span>):
         <span class=hljs-built_in >super</span>(DDPM, self).__init__()
@@ -57,4 +57,4 @@
             
         pred_prev_sample = pred_prev_sample + variance
 
-        <span class=hljs-keyword >return</span> pred_prev_sample</code></pre> <h2 id=summary_a_hrefhttpsarxivorgabs200611239denoising_diffusion_probabilistic_models ><a href="#summary_a_hrefhttpsarxivorgabs200611239denoising_diffusion_probabilistic_models" class=header-anchor >Summary: <a href="https://arxiv.org/abs/2006.11239">Denoising Diffusion Probabilistic Models</a> </a></h2> <p>&#40;J. Ho, A. Jain, P. Abbeel 2020&#41;</p> <img src="../extras/diffusions/ddpm.png" style="width: 620px; height: auto; display: inline"> <div class=colbox-blue ><p>Given a schedule <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>β</mi><mn>1</mn></msub><mo>&lt;</mo><msub><mi>β</mi><mn>2</mn></msub><mo>&lt;</mo><mo>⋯</mo><mo>&lt;</mo><msub><mi>β</mi><mi>T</mi></msub></mrow><annotation encoding="application/x-tex">\beta_1&lt;\beta_2&lt;\dots &lt;\beta_T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, the <strong>forward diffusion process</strong> is defined by: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >;</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>β</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_t I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.085em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.835em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.795em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20500000000000007em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mn>1</mn><mo>:</mo><mi>T</mi></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{1:T}|x_0) = \prod_{t=1}^T q(x_t|x_{t-1})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">1</span><span class="mrel mtight">:</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.2809409999999999em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>.</p> <p>With <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mi>t</mi></msub><mo>=</mo><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">\alpha_t = 1-\beta_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.72777em;vertical-align:-0.08333em;"></span><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent=true ><msub><mi>α</mi><mi>t</mi></msub><mo stretchy=true >‾</mo></mover><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>t</mi></msubsup><msub><mi>α</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">\overline{\alpha_t} = \prod_{i=1}^t\alpha_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.78056em;vertical-align:-0.15em;"></span><span class="mord overline"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.233166em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.933456em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we see that, with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϵ</mi><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\epsilon\sim\mathcal{N}(0,I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">ϵ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span>: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><msqrt><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></msqrt><msub><mi>x</mi><mn>0</mn></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt><mi>ϵ</mi><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_t = \sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon. \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5410950000000003em;vertical-align:-0.5205475em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0205475000000002em;"><span style="top:-3.1394525em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.874155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.834155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.16584500000000002em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5205475em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> The law <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{t-1}|x_t,\epsilon)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mclose >)</span></span></span></span> is explicit: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >;</mo><mi>μ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo separator=true >,</mo><msub><mi>γ</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{t-1}|x_t,\epsilon) = \mathcal{N}(x_{t-1};\mu(x_t,\epsilon,t), \gamma_t I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">μ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05556em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span> with, <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi>μ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo>=</mo><mfrac><mn>1</mn><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt></mfrac><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>−</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt></mfrac><mi>ϵ</mi><mo fence=true >)</mo></mrow><mtext> and, </mtext><msub><mi>γ</mi><mi>t</mi></msub><mo>=</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></mfrac><msub><mi>β</mi><mi>t</mi></msub></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mu(x_t,\epsilon, t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon\right)\text{ and, } \gamma_t = \frac{1-\overline{\alpha}_{t-1}}{1-\overline{\alpha}_{t}}\beta_t \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.7507200000000003em;vertical-align:-1.12536em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6253600000000004em;"><span style="top:-3.6253600000000006em;"><span class=pstrut  style="height:3.45em;"></span><span class=mord ><span class="mord mathdefault">μ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.72528em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.68528em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.31472em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.00072em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.27778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8322200000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.79222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20777999999999996em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mord mathdefault">ϵ</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > and, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05556em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.12536em;"><span></span></span></span></span></span></span></span></span></span></span></span></div></p></div> <div class=colbox-blue ><strong>Training</strong>: to approximate <strong>the reversed diffusion</strong> <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{t-1}|x_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> by a neural network given by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >;</mo><msub><mi>μ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo separator=true >,</mo><msub><mi>β</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">p_{\theta}(x_{t-1}|x_t) = \mathcal{N}(x_{t-1}; \mu_{\theta}(x_t,t), \beta_t I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">μ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>p</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>T</mi></msub><mo stretchy=false >)</mo><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">p(x_T) \sim \mathcal{N}(0,I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">p</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span>, we maximize the usual Variational bound: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi mathvariant=double-struck >E</mi><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo></mrow></msub><mi>ln</mi><mo>⁡</mo><msub><mi>p</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>≥</mo><msub><mi>L</mi><mi>T</mi></msub><mo>+</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>2</mn></mrow><mi>T</mi></munderover><msub><mi>L</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>+</mo><msub><mi>L</mi><mn>0</mn></msub><mtext> with, </mtext><msub><mi>L</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>=</mo><msub><mi mathvariant=double-struck >E</mi><mi>q</mi></msub><mrow><mo fence=true >[</mo><mfrac><mn>1</mn><mrow><mn>2</mn><msubsup><mi>σ</mi><mi>t</mi><mn>2</mn></msubsup></mrow></mfrac><mi mathvariant=normal >∥</mi><msub><mi>μ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo>−</mo><mi>μ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo fence=true >]</mo></mrow><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mathbb{E}_{q(x_0)} \ln p_{\theta}(x_0) &amp;\geq L_T +\sum_{t=2}^T L_{t-1}+L_0 \text{ with, }L_{t-1} = \mathbb{E}_q\left[ \frac{1}{2\sigma_t^2}\|\mu_\theta(x_t,t) -\mu(x_t,\epsilon,t)\|^2\right]. \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:3.395449em;vertical-align:-1.4477245em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.9477245em;"><span style="top:-3.9477245em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">q</span><span class="mopen mtight">(</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >ln</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.4477245em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.9477245em;"><span style="top:-3.9477245em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≥</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">2</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.15139200000000003em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">[</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7959079999999998em;"><span style="top:-2.454244em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span><span style="top:-3.0448em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24575599999999992em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.931756em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord >∥</span><span class=mord ><span class="mord mathdefault">μ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">μ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">]</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.4477245em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> With the change of variable: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>μ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo>=</mo><mfrac><mn>1</mn><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt></mfrac><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>−</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt></mfrac><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow><mo separator=true >,</mo></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mu_\theta(x_t,t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right), \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.7507200000000003em;vertical-align:-1.12536em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6253600000000004em;"><span style="top:-3.6253600000000006em;"><span class=pstrut  style="height:3.45em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">μ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.72528em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.68528em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.31472em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.00072em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.27778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8322200000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.79222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20777999999999996em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.12536em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> ignoring the prefactor and sampling <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>τ</mi></mrow><annotation encoding="application/x-tex">\tau</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.1132em;">τ</span></span></span></span> instead of summing over all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span>, the loss is finally: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi mathvariant=normal >ℓ</mi><mo stretchy=false >(</mo><mi>θ</mi><mo stretchy=false >)</mo><mo>=</mo><msub><mi mathvariant=double-struck >E</mi><mi>τ</mi></msub><msub><mi mathvariant=double-struck >E</mi><mi>ϵ</mi></msub><mrow><mo fence=true >[</mo><mi mathvariant=normal >∥</mi><mi>ϵ</mi><mo>−</mo><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msqrt><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>τ</mi></msub></msqrt><msub><mi>x</mi><mn>0</mn></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>τ</mi></msub></mrow></msqrt><mi>ϵ</mi><mo separator=true >,</mo><mi>τ</mi><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo fence=true >]</mo></mrow></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \ell(\theta) = \mathbb{E}_\tau\mathbb{E}_\epsilon \left[ \|\epsilon - \epsilon_\theta(\sqrt{\overline{\alpha}_\tau}x_0 + \sqrt{1-\overline{\alpha}_\tau}\epsilon, \tau)\|^2\right] \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5410950000000003em;vertical-align:-0.5205475em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0205475000000002em;"><span style="top:-3.1394525em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >ℓ</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.02778em;">θ</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.1132em;">τ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">ϵ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size1">[</span></span><span class=mord >∥</span><span class="mord mathdefault">ϵ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.874155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.1132em;">τ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.834155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.16584500000000002em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.1132em;">τ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.1132em;">τ</span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size1">]</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5205475em;"><span></span></span></span></span></span></span></span></span></span></span></span></div></div> <div class=colbox-blue ><strong>Sampling</strong>: to simulate the reversed diffusion with the learned <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\epsilon_\theta(x_t,t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span></span></span></span> starting from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>T</mi></msub><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">x_T\sim \mathcal{N}(0,I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span>, iterate for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi><mo>=</mo><mi>T</mi><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">t=T,\dots, 1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">T</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span></span></span></span>: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>=</mo><mfrac><mn>1</mn><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt></mfrac><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>−</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt></mfrac><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow><mo>+</mo><msqrt><msub><mi>β</mi><mi>t</mi></msub></msqrt><mi>ϵ</mi><mo separator=true >,</mo><mtext> with </mtext><mi>ϵ</mi><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_{t-1} = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right)+\sqrt{\beta_t}\epsilon,\text{ with } \epsilon\sim\mathcal{N}(0,I). \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.7507200000000003em;vertical-align:-1.12536em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6253600000000004em;"><span style="top:-3.6253600000000006em;"><span class=pstrut  style="height:3.45em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.72528em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.68528em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.31472em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.00072em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.27778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8322200000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.79222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20777999999999996em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.983875em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.9438750000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25612499999999994em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class="mord mathdefault">ϵ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.12536em;"><span></span></span></span></span></span></span></span></span></span></span></span></div></div> <h2 id=implementation ><a href="#implementation" class=header-anchor >Implementation</a></h2> <p><img src="../extras/diffusions/mnist_diffusion.gif" alt="" /></p> <h3 id=mnist ><a href="#mnist" class=header-anchor >MNIST</a></h3> <p>The training of this notebook on colab takes approximately 20 minutes.</p> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_empty.ipynb">ddpm&#95;nano&#95;empty.ipynb</a> is the notebook where you code the DDPM algorithm &#40;a simple UNet is provided for the network <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><mi>x</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\epsilon_\theta(x,t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span></span></span></span>&#41;, its training and the sampling. You should get results like this:</p> </ul> <p><img src="../extras/diffusions/mnist_result.png" alt="" /></p> <ul> <li><p>Here is the corresponding solution: <a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_sol.ipynb">ddpm&#95;nano&#95;sol.ipynb</a></p> </ul> <h3 id=cifar10 ><a href="#cifar10" class=header-anchor >CIFAR10</a></h3> <p>The training of this notebook on colab takes approximately 20 minutes &#40;so do not expect high-quality pictures&#33;&#41;. Still, after finetuning on specific classes, we see that the model learns features of the class.</p> <img src="../extras/diffusions/diffusion_finetuning.png" style="width: 620px; height: auto; display: inline"> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_micro_sol.ipynb">ddpm&#95;micro&#95;sol.ipynb</a></p> </ul> <p>With a bit more training &#40;100 epochs&#41;, you can get results like this:</p> <p><img src="../extras/diffusions/ships.png" alt="" /></p> <p><img src="../extras/diffusions/horses.png" alt="" /></p> <p><img src="../extras/diffusions/trucks.png" alt="" /></p> <h2 id=technical_details ><a href="#technical_details" class=header-anchor >Technical details</a></h2> <p>Note that the Denoising Diffusion Probabilistic Model is the same for MNIST and CIFAR10, we only change the UNet learning to reverse the noise. For CIFAR10, we adapt the UNet provided in <a href="../9b-unet">Module 9b</a>. Indeed, you can still use the code provided here for DDPM with other architectures like more complex ones with self-attention like this <a href="https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py#L271">Unet</a> coded by <a href="https://github.com/lucidrains">lucidrains</a> which is the one used in the original paper.</p> <p>In the paper, the authors used Exponential Moving Average &#40;EMA&#41; on model parameters with a decay factor of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>0.999</mn></mrow><annotation encoding="application/x-tex">0.999</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >0</span><span class=mord >.</span><span class=mord >9</span><span class=mord >9</span><span class=mord >9</span></span></span></span>. This is not implemented here to keep the code as simple as possible.</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+        <span class=hljs-keyword >return</span> pred_prev_sample</code></pre> <h2 id=summary_a_hrefhttpsarxivorgabs200611239denoising_diffusion_probabilistic_models ><a href="#summary_a_hrefhttpsarxivorgabs200611239denoising_diffusion_probabilistic_models" class=header-anchor >Summary: <a href="https://arxiv.org/abs/2006.11239">Denoising Diffusion Probabilistic Models</a> </a></h2> <p>&#40;J. Ho, A. Jain, P. Abbeel 2020&#41;</p> <img src="../extras/diffusions/ddpm.png" style="width: 620px; height: auto; display: inline"> <div class=colbox-blue ><p>Given a schedule <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>β</mi><mn>1</mn></msub><mo>&lt;</mo><msub><mi>β</mi><mn>2</mn></msub><mo>&lt;</mo><mo>⋯</mo><mo>&lt;</mo><msub><mi>β</mi><mi>T</mi></msub></mrow><annotation encoding="application/x-tex">\beta_1&lt;\beta_2&lt;\dots &lt;\beta_T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&lt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, the <strong>forward diffusion process</strong> is defined by: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >;</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow></msqrt><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>β</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_t|x_{t-1}) = \mathcal{N}(x_t; \sqrt{1-\beta_t}x_{t-1},\beta_t I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.085em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.835em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.795em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20500000000000007em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mn>1</mn><mo>:</mo><mi>T</mi></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></msubsup><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{1:T}|x_0) = \prod_{t=1}^T q(x_t|x_{t-1})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">1</span><span class="mrel mtight">:</span><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.2809409999999999em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>.</p> <p>With <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>α</mi><mi>t</mi></msub><mo>=</mo><mn>1</mn><mo>−</mo><msub><mi>β</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">\alpha_t = 1-\beta_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.72777em;vertical-align:-0.08333em;"></span><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent=true ><msub><mi>α</mi><mi>t</mi></msub><mo stretchy=true >‾</mo></mover><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>t</mi></msubsup><msub><mi>α</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">\overline{\alpha_t} = \prod_{i=1}^t\alpha_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.78056em;vertical-align:-0.15em;"></span><span class="mord overline"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.233166em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.933456em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we see that, with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϵ</mi><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\epsilon\sim\mathcal{N}(0,I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">ϵ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span>: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><msqrt><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></msqrt><msub><mi>x</mi><mn>0</mn></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt><mi>ϵ</mi><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_t = \sqrt{\overline{\alpha}_t}x_0 + \sqrt{1-\overline{\alpha}_t}\epsilon. \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5410950000000003em;vertical-align:-0.5205475em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0205475000000002em;"><span style="top:-3.1394525em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.874155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.834155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.16584500000000002em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5205475em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> The law <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{t-1}|x_t,\epsilon)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mclose >)</span></span></span></span> is explicit: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >;</mo><mi>μ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo separator=true >,</mo><msub><mi>γ</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{t-1}|x_t,\epsilon) = \mathcal{N}(x_{t-1};\mu(x_t,\epsilon,t), \gamma_t I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">μ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05556em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span> with, <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi>μ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo>=</mo><mfrac><mn>1</mn><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt></mfrac><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>−</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt></mfrac><mi>ϵ</mi><mo fence=true >)</mo></mrow><mtext> and, </mtext><msub><mi>γ</mi><mi>t</mi></msub><mo>=</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></mfrac><msub><mi>β</mi><mi>t</mi></msub></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mu(x_t,\epsilon, t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon\right)\text{ and, } \gamma_t = \frac{1-\overline{\alpha}_{t-1}}{1-\overline{\alpha}_{t}}\beta_t \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.7507200000000003em;vertical-align:-1.12536em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6253600000000004em;"><span style="top:-3.6253600000000006em;"><span class=pstrut  style="height:3.45em;"></span><span class=mord ><span class="mord mathdefault">μ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.72528em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.68528em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.31472em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.00072em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.27778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8322200000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.79222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20777999999999996em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mord mathdefault">ϵ</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > and, </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05556em;">γ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05556em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.12536em;"><span></span></span></span></span></span></span></span></span></span></span></span></div></p></div> <div class=colbox-blue ><strong>Training</strong>: to approximate <strong>the reversed diffusion</strong> <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">q(x_{t-1}|x_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">q</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> by a neural network given by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mi mathvariant=normal >∣</mi><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >;</mo><msub><mi>μ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo separator=true >,</mo><msub><mi>β</mi><mi>t</mi></msub><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">p_{\theta}(x_{t-1}|x_t) = \mathcal{N}(x_{t-1}; \mu_{\theta}(x_t,t), \beta_t I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord >∣</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >;</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">μ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>p</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>T</mi></msub><mo stretchy=false >)</mo><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">p(x_T) \sim \mathcal{N}(0,I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">p</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span>, we maximize the usual Variational bound: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi mathvariant=double-struck >E</mi><mrow><mi>q</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo></mrow></msub><mi>ln</mi><mo>⁡</mo><msub><mi>p</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>0</mn></msub><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>≥</mo><msub><mi>L</mi><mi>T</mi></msub><mo>+</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>2</mn></mrow><mi>T</mi></munderover><msub><mi>L</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>+</mo><msub><mi>L</mi><mn>0</mn></msub><mtext> with, </mtext><msub><mi>L</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>=</mo><msub><mi mathvariant=double-struck >E</mi><mi>q</mi></msub><mrow><mo fence=true >[</mo><mfrac><mn>1</mn><mrow><mn>2</mn><msubsup><mi>σ</mi><mi>t</mi><mn>2</mn></msubsup></mrow></mfrac><mi mathvariant=normal >∥</mi><msub><mi>μ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo>−</mo><mi>μ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>ϵ</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo fence=true >]</mo></mrow><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mathbb{E}_{q(x_0)} \ln p_{\theta}(x_0) &amp;\geq L_T +\sum_{t=2}^T L_{t-1}+L_0 \text{ with, }L_{t-1} = \mathbb{E}_q\left[ \frac{1}{2\sigma_t^2}\|\mu_\theta(x_t,t) -\mu(x_t,\epsilon,t)\|^2\right]. \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:3.395449em;vertical-align:-1.4477245em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.9477245em;"><span style="top:-3.9477245em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">q</span><span class="mopen mtight">(</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >ln</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.4477245em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.9477245em;"><span style="top:-3.9477245em;"><span class=pstrut  style="height:3.828336em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≥</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8283360000000002em;"><span style="top:-1.882887em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">2</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.267113em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord text"><span class=mord > with, </span></span><span class=mord ><span class="mord mathdefault">L</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.15139200000000003em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">q</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">[</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7959079999999998em;"><span style="top:-2.454244em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span><span style="top:-3.0448em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24575599999999992em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.931756em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord >∥</span><span class=mord ><span class="mord mathdefault">μ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">μ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">]</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.4477245em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> With the change of variable: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>μ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo>=</mo><mfrac><mn>1</mn><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt></mfrac><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>−</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt></mfrac><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow><mo separator=true >,</mo></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mu_\theta(x_t,t) = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right), \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.7507200000000003em;vertical-align:-1.12536em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6253600000000004em;"><span style="top:-3.6253600000000006em;"><span class=pstrut  style="height:3.45em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">μ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.72528em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.68528em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.31472em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.00072em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.27778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8322200000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.79222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20777999999999996em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.12536em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> ignoring the prefactor and sampling <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>τ</mi></mrow><annotation encoding="application/x-tex">\tau</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.1132em;">τ</span></span></span></span> instead of summing over all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex">t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span></span></span></span>, the loss is finally: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi mathvariant=normal >ℓ</mi><mo stretchy=false >(</mo><mi>θ</mi><mo stretchy=false >)</mo><mo>=</mo><msub><mi mathvariant=double-struck >E</mi><mi>τ</mi></msub><msub><mi mathvariant=double-struck >E</mi><mi>ϵ</mi></msub><mrow><mo fence=true >[</mo><mi mathvariant=normal >∥</mi><mi>ϵ</mi><mo>−</mo><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msqrt><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>τ</mi></msub></msqrt><msub><mi>x</mi><mn>0</mn></msub><mo>+</mo><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>τ</mi></msub></mrow></msqrt><mi>ϵ</mi><mo separator=true >,</mo><mi>τ</mi><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo fence=true >]</mo></mrow></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \ell(\theta) = \mathbb{E}_\tau\mathbb{E}_\epsilon \left[ \|\epsilon - \epsilon_\theta(\sqrt{\overline{\alpha}_\tau}x_0 + \sqrt{1-\overline{\alpha}_\tau}\epsilon, \tau)\|^2\right] \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5410950000000003em;vertical-align:-0.5205475em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0205475000000002em;"><span style="top:-3.1394525em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >ℓ</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.02778em;">θ</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.1132em;">τ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class=mord ><span class="mord mathbb">E</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">ϵ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size1">[</span></span><span class=mord >∥</span><span class="mord mathdefault">ϵ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.874155em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.1132em;">τ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.834155em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.16584500000000002em;"><span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8810950000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.1132em;">τ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.841095em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15890499999999996em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.1132em;">τ</span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size1">]</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5205475em;"><span></span></span></span></span></span></span></span></span></span></span></span></div></div> <div class=colbox-blue ><strong>Sampling</strong>: to simulate the reversed diffusion with the learned <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\epsilon_\theta(x_t,t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span></span></span></span> starting from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>T</mi></msub><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">x_T\sim \mathcal{N}(0,I)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.32833099999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span></span></span></span>, iterate for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi><mo>=</mo><mi>T</mi><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">t=T,\dots, 1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.61508em;vertical-align:0em;"></span><span class="mord mathdefault">t</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">T</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span></span></span></span>: <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mi>x</mi><mrow><mi>t</mi><mo>−</mo><mn>1</mn></mrow></msub><mo>=</mo><mfrac><mn>1</mn><msqrt><msub><mi>α</mi><mi>t</mi></msub></msqrt></mfrac><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>−</mo><mfrac><mrow><mn>1</mn><mo>−</mo><msub><mi>α</mi><mi>t</mi></msub></mrow><msqrt><mrow><mn>1</mn><mo>−</mo><msub><mover accent=true ><mi>α</mi><mo stretchy=true >‾</mo></mover><mi>t</mi></msub></mrow></msqrt></mfrac><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow><mo>+</mo><msqrt><msub><mi>β</mi><mi>t</mi></msub></msqrt><mi>ϵ</mi><mo separator=true >,</mo><mtext> with </mtext><mi>ϵ</mi><mo>∼</mo><mi mathvariant=script >N</mi><mo stretchy=false >(</mo><mn>0</mn><mo separator=true >,</mo><mi>I</mi><mo stretchy=false >)</mo><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} x_{t-1} = \frac{1}{\sqrt{\alpha_t}}\left( x_t-\frac{1-\alpha_t}{\sqrt{1-\overline{\alpha}_t}}\epsilon_\theta(x_t,t)\right)+\sqrt{\beta_t}\epsilon,\text{ with } \epsilon\sim\mathcal{N}(0,I). \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.7507200000000003em;vertical-align:-1.12536em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6253600000000004em;"><span style="top:-3.6253600000000006em;"><span class=pstrut  style="height:3.45em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">t</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.72528em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.68528em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.31472em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.00072em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.27778em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8322200000000001em;"><span class=svg-align  style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord  style="padding-left:0.833em;"><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord overline"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.63056em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span><span style="top:-3.55056em;"><span class=pstrut  style="height:3em;"></span><span class=overline-line  style="border-bottom-width:0.04em;"></span></span></span></span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.79222em;"><span class=pstrut  style="height:3em;"></span><span class=hide-tail  style="min-width:0.853em;height:1.08em;"><svg width='400em' height='1.08em' viewBox='0 0 400000 1080' preserveAspectRatio='xMinYMin slice'><path d='M95,702 c-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14 c0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54 c44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10 s173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429 c69,-144,104.5,-217.7,106.5,-221 l0 -0 c5.3,-9.3,12,-14,20,-14 H400000v40H845.2724 s-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7 c-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z M834 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.20777999999999996em;"><span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.0037em;">α</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.0037em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.93em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.983875em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mord ><span class="mord mathdefault" style="margin-right:0.05278em;">β</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.05278em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-2.9438750000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25612499999999994em;"><span></span></span></span></span></span><span class="mord mathdefault">ϵ</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class="mord mathdefault">ϵ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class=mopen >(</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">I</span><span class=mclose >)</span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.12536em;"><span></span></span></span></span></span></span></span></span></span></span></span></div></div> <h2 id=implementation ><a href="#implementation" class=header-anchor >Implementation</a></h2> <p><img src="../extras/diffusions/mnist_diffusion.gif" alt="" /></p> <h3 id=mnist ><a href="#mnist" class=header-anchor >MNIST</a></h3> <p>The training of this notebook on colab takes approximately 20 minutes.</p> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_empty.ipynb">ddpm&#95;nano&#95;empty.ipynb</a> is the notebook where you code the DDPM algorithm &#40;a simple UNet is provided for the network <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>ϵ</mi><mi>θ</mi></msub><mo stretchy=false >(</mo><mi>x</mi><mo separator=true >,</mo><mi>t</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\epsilon_\theta(x,t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">ϵ</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">t</span><span class=mclose >)</span></span></span></span>&#41;, its training and the sampling. You should get results like this:</p> </ul> <p><img src="../extras/diffusions/mnist_result.png" alt="" /></p> <ul> <li><p>Here is the corresponding solution: <a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_nano_sol.ipynb">ddpm&#95;nano&#95;sol.ipynb</a></p> </ul> <h3 id=cifar10 ><a href="#cifar10" class=header-anchor >CIFAR10</a></h3> <p>The training of this notebook on colab takes approximately 20 minutes &#40;so do not expect high-quality pictures&#33;&#41;. Still, after finetuning on specific classes, we see that the model learns features of the class.</p> <img src="../extras/diffusions/diffusion_finetuning.png" style="width: 620px; height: auto; display: inline"> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module18/ddpm_micro_sol.ipynb">ddpm&#95;micro&#95;sol.ipynb</a></p> </ul> <p>With a bit more training &#40;100 epochs&#41;, you can get results like this:</p> <p><img src="../extras/diffusions/ships.png" alt="" /></p> <p><img src="../extras/diffusions/horses.png" alt="" /></p> <p><img src="../extras/diffusions/trucks.png" alt="" /></p> <h2 id=technical_details ><a href="#technical_details" class=header-anchor >Technical details</a></h2> <p>Note that the Denoising Diffusion Probabilistic Model is the same for MNIST and CIFAR10, we only change the UNet learning to reverse the noise. For CIFAR10, we adapt the UNet provided in <a href="../9b-unet">Module 9b</a>. Indeed, you can still use the code provided here for DDPM with other architectures like more complex ones with self-attention like this <a href="https://github.com/lucidrains/denoising-diffusion-pytorch/blob/main/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py#L271">Unet</a> coded by <a href="https://github.com/lucidrains">lucidrains</a> which is the one used in the original paper.</p> <p>In the paper, the authors used Exponential Moving Average &#40;EMA&#41; on model parameters with a decay factor of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>0.999</mn></mrow><annotation encoding="application/x-tex">0.999</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >0</span><span class=mord >.</span><span class=mord >9</span><span class=mord >9</span><span class=mord >9</span></span></span></span>. This is not implemented here to keep the code as simple as possible.</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/19-clip/index.html b/modules/19-clip/index.html
new file mode 100644
index 0000000..dd0004c
--- /dev/null
+++ b/modules/19-clip/index.html
@@ -0,0 +1 @@
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item active" href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_19_-_zero-shot_classification_with_clip ><a href="#module_19_-_zero-shot_classification_with_clip" class=header-anchor >Module 19 - Zero-shot classification with CLIP</a></h1> <img src="../extras/clip/diagram.png" style="width: 700px; height: auto; display: inline"> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p>in <a href="https://github.com/dataflowr/notebooks/blob/master/Module19/Zeroshot_with_CLIP.ipynb">Zeroshot&#95;with&#95;CLIP.ipynb</a> we build a zero-shop classifier using the pretrained CLIP network and improve its performance with descriptors generated with GPT.</p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <blockquote> <p><a href="https://github.com/openai/CLIP">CLIP</a> Learning Transferable Visual Models From Natural Language Supervision &#40;ICML 2021&#41; Alec Radford et al.</p> </blockquote> <blockquote> <p><a href="https://github.com/sachit-menon/classify_by_description_release">Visual Classification via Description from Large Language Models</a> &#40;ICLR 2023&#41; Menon, Sachit and Vondrick, Carl</p> </blockquote> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/2a-pytorch-tensors/index.html b/modules/2a-pytorch-tensors/index.html
index 4b4b995..1fd90df 100644
--- a/modules/2a-pytorch-tensors/index.html
+++ b/modules/2a-pytorch-tensors/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item active" href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_2a_-_pytorch_tensors ><a href="#module_2a_-_pytorch_tensors" class=header-anchor >Module 2a - Pytorch tensors</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#pytorch_tensors">Pytorch tensors</a><li><a href="#notebook">Notebook</a><li><a href="#quiz">Quiz</a></ol></div> <h2 id=pytorch_tensors ><a href="#pytorch_tensors" class=header-anchor >Pytorch tensors</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'BmAS8IH7n3c', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(103,0)'> 1:43</a> Introduction to tensors <br> <a href='#player' onclick='changeYouTubeSource(272,0)'> 4:32</a> Sizes <br> <a href='#player' onclick='changeYouTubeSource(325,0)'> 5:25</a> Bridge to numpy <br> <a href='#player' onclick='changeYouTubeSource(670,0)'> 11:10</a> Broadcasting <br> <a href='#player' onclick='changeYouTubeSource(875,0)'> 14:35</a> Inplace modification <br> <a href='#player' onclick='changeYouTubeSource(990,0)'> 16:30</a> Shared memory <br> <a href='#player' onclick='changeYouTubeSource(1120,0)'> 18:40</a> Cuda <br> <a href='#player' onclick='changeYouTubeSource(1354,0)'> 22:34</a> CIFAR dataset </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="/website/notebooks_md/02a_basics">static notebook</a>, <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">code &#40;GitHub&#41;</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a></p> </ul> <h2 id=quiz ><a href="#quiz" class=header-anchor >Quiz</a></h2> <p>To check your understanding of the material, you can do the <a href="https://dataflowr.github.io/quiz/module2.html">quizzes</a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item active" href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_2a_-_pytorch_tensors ><a href="#module_2a_-_pytorch_tensors" class=header-anchor >Module 2a - Pytorch tensors</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#pytorch_tensors">Pytorch tensors</a><li><a href="#notebook">Notebook</a><li><a href="#quiz">Quiz</a></ol></div> <h2 id=pytorch_tensors ><a href="#pytorch_tensors" class=header-anchor >Pytorch tensors</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'BmAS8IH7n3c', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(103,0)'> 1:43</a> Introduction to tensors <br> <a href='#player' onclick='changeYouTubeSource(272,0)'> 4:32</a> Sizes <br> <a href='#player' onclick='changeYouTubeSource(325,0)'> 5:25</a> Bridge to numpy <br> <a href='#player' onclick='changeYouTubeSource(670,0)'> 11:10</a> Broadcasting <br> <a href='#player' onclick='changeYouTubeSource(875,0)'> 14:35</a> Inplace modification <br> <a href='#player' onclick='changeYouTubeSource(990,0)'> 16:30</a> Shared memory <br> <a href='#player' onclick='changeYouTubeSource(1120,0)'> 18:40</a> Cuda <br> <a href='#player' onclick='changeYouTubeSource(1354,0)'> 22:34</a> CIFAR dataset </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="/website/notebooks_md/02a_basics">static notebook</a>, <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">code &#40;GitHub&#41;</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a></p> </ul> <h2 id=quiz ><a href="#quiz" class=header-anchor >Quiz</a></h2> <p>To check your understanding of the material, you can do the <a href="https://dataflowr.github.io/quiz/module2.html">quizzes</a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/2b-automatic-differentiation/index.html b/modules/2b-automatic-differentiation/index.html
index 2734f35..a5b61ed 100644
--- a/modules/2b-automatic-differentiation/index.html
+++ b/modules/2b-automatic-differentiation/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item active" href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_2b_-_automatic_differentiation ><a href="#module_2b_-_automatic_differentiation" class=header-anchor >Module 2b - Automatic differentiation</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#automatic_differentiation">Automatic differentiation</a><li><a href="#slides_and_notebook">Slides and Notebook</a><li><a href="#quiz">Quiz</a><li><a href="#practicals">Practicals</a><li><a href="#challenge">Challenge</a><li><a href="#bonus">Bonus: </a></ol></div> <h2 id=automatic_differentiation ><a href="#automatic_differentiation" class=header-anchor >Automatic differentiation</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Z6H3zakmn6E', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(40,0)'> 0:40</a> A simple example (more in the practicals) <br> <a href='#player' onclick='changeYouTubeSource(224,0)'> 3:44</a> Pytorch tensor: requires_grad field <br> <a href='#player' onclick='changeYouTubeSource(404,0)'> 6:44</a> Pytorch backward function <br> <a href='#player' onclick='changeYouTubeSource(545,0)'> 9:05</a> The chain rule on our example <br> <a href='#player' onclick='changeYouTubeSource(960,0)'> 16:00</a> Linear regression <br> <a href='#player' onclick='changeYouTubeSource(1080,0)'> 18:00</a> Gradient descent with numpy... <br> <a href='#player' onclick='changeYouTubeSource(1650,0)'> 27:30</a> ... with pytorch tensors <br> <a href='#player' onclick='changeYouTubeSource(1890,0)'> 31:30</a> Using autograd <br> <a href='#player' onclick='changeYouTubeSource(2075,0)'> 34:35</a> Using a neural network (linear layer) <br> <a href='#player' onclick='changeYouTubeSource(2390,0)'> 39:50</a> Using a pytorch optimizer <br> <a href='#player' onclick='changeYouTubeSource(2640,Backprop)'> 44:00</a> algorithm: how automatic differentiation works </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p>Automatic differentiation: a simple example <a href="/website/notebooks_md/02a_basics">static notebook</a>, <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">code &#40;GitHub&#41;</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">notebook</a> used in the video for the linear regression. If you want to open it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">colab</a></p> <li><p><a href="https://raw.githubusercontent.com/dataflowr/slides/master/backprop.pdf">backprop slide</a> &#40;used for the practical below&#41;</p> </ul> <h2 id=quiz ><a href="#quiz" class=header-anchor >Quiz</a></h2> <p>To check your understanding of automatic differentiation, you can do the <a href="https://dataflowr.github.io/quiz/module2.html">quizzes</a></p> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <p><img src="https://dataflowr.github.io/notebooks/Module2/img/backprop3.png" alt="" /></p> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02_backprop.ipynb">practicals</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02_backprop.ipynb">colab</a> Coding backprop. </p> </ul> <h2 id=challenge ><a href="#challenge" class=header-anchor >Challenge</a></h2> <p>Adapt your code to solve the following challenge:</p> <p><img src="https://dataflowr.github.io/notebooks/Module2/img/backprop4.png" alt="" /></p> <p>Some small modifications:</p> <ul> <li><p>First modification: we now generate points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>t</mi></msub><mo>=</mo><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><msup><mi>w</mi><mo>∗</mo></msup><mi>cos</mi><mo>⁡</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>+</mo><msup><mi>b</mi><mo>∗</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">y_t= \exp(w^*\cos(x_t)+b^*)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mop >exp</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >cos</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>, i.e <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>y</mi><mi>t</mi><mo>∗</mo></msubsup></mrow><annotation encoding="application/x-tex">y^*_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.935696em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-2.4530000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span></span></span></span> is obtained by applying a deterministic function to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">x_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> with parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>w</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">w^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.688696em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>b</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">b^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span>. Our goal is still to recover the parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>w</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">w^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.688696em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>b</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">b^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> from the observations <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>.</p> <li><p>Second modification: we now generate points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>t</mi></msub><mo>=</mo><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><msup><mi>w</mi><mo>∗</mo></msup><mi>cos</mi><mo>⁡</mo><mo stretchy=false >(</mo><msup><mi>p</mi><mo>∗</mo></msup><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>+</mo><msup><mi>b</mi><mo>∗</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">y_t= \exp(w^*\cos(p^*x_t)+b^*)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mop >exp</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >cos</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>, i.e <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>y</mi><mi>t</mi><mo>∗</mo></msubsup></mrow><annotation encoding="application/x-tex">y^*_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.935696em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-2.4530000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span></span></span></span> is obtained by applying a deterministic function to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">x_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> with parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>p</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">p^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8831359999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>w</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">w^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.688696em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>b</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">b^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span>. Our goal is still to recover the parameters from the observations <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>.</p> </ul> <h2 id=bonus ><a href="#bonus" class=header-anchor >Bonus: </a></h2> <ul> <li><p><a href="https://jax.readthedocs.io/en/latest/index.html">JAX</a> implementation of the linear regression <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb">colab</a> see <a href="/website/modules/2c-jax">Module 2c</a> for more details.</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item active" href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_2b_-_automatic_differentiation ><a href="#module_2b_-_automatic_differentiation" class=header-anchor >Module 2b - Automatic differentiation</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#automatic_differentiation">Automatic differentiation</a><li><a href="#slides_and_notebook">Slides and Notebook</a><li><a href="#quiz">Quiz</a><li><a href="#practicals">Practicals</a><li><a href="#challenge">Challenge</a><li><a href="#bonus">Bonus: </a></ol></div> <h2 id=automatic_differentiation ><a href="#automatic_differentiation" class=header-anchor >Automatic differentiation</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Z6H3zakmn6E', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(40,0)'> 0:40</a> A simple example (more in the practicals) <br> <a href='#player' onclick='changeYouTubeSource(224,0)'> 3:44</a> Pytorch tensor: requires_grad field <br> <a href='#player' onclick='changeYouTubeSource(404,0)'> 6:44</a> Pytorch backward function <br> <a href='#player' onclick='changeYouTubeSource(545,0)'> 9:05</a> The chain rule on our example <br> <a href='#player' onclick='changeYouTubeSource(960,0)'> 16:00</a> Linear regression <br> <a href='#player' onclick='changeYouTubeSource(1080,0)'> 18:00</a> Gradient descent with numpy... <br> <a href='#player' onclick='changeYouTubeSource(1650,0)'> 27:30</a> ... with pytorch tensors <br> <a href='#player' onclick='changeYouTubeSource(1890,0)'> 31:30</a> Using autograd <br> <a href='#player' onclick='changeYouTubeSource(2075,0)'> 34:35</a> Using a neural network (linear layer) <br> <a href='#player' onclick='changeYouTubeSource(2390,0)'> 39:50</a> Using a pytorch optimizer <br> <a href='#player' onclick='changeYouTubeSource(2640,Backprop)'> 44:00</a> algorithm: how automatic differentiation works </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p>Automatic differentiation: a simple example <a href="/website/notebooks_md/02a_basics">static notebook</a>, <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">code &#40;GitHub&#41;</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">notebook</a> used in the video for the linear regression. If you want to open it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02b_linear_reg.ipynb">colab</a></p> <li><p><a href="https://raw.githubusercontent.com/dataflowr/slides/master/backprop.pdf">backprop slide</a> &#40;used for the practical below&#41;</p> </ul> <h2 id=quiz ><a href="#quiz" class=header-anchor >Quiz</a></h2> <p>To check your understanding of automatic differentiation, you can do the <a href="https://dataflowr.github.io/quiz/module2.html">quizzes</a></p> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <p><img src="https://dataflowr.github.io/notebooks/Module2/img/backprop3.png" alt="" /></p> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02_backprop.ipynb">practicals</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02_backprop.ipynb">colab</a> Coding backprop. </p> </ul> <h2 id=challenge ><a href="#challenge" class=header-anchor >Challenge</a></h2> <p>Adapt your code to solve the following challenge:</p> <p><img src="https://dataflowr.github.io/notebooks/Module2/img/backprop4.png" alt="" /></p> <p>Some small modifications:</p> <ul> <li><p>First modification: we now generate points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>t</mi></msub><mo>=</mo><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><msup><mi>w</mi><mo>∗</mo></msup><mi>cos</mi><mo>⁡</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>+</mo><msup><mi>b</mi><mo>∗</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">y_t= \exp(w^*\cos(x_t)+b^*)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mop >exp</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >cos</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>, i.e <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>y</mi><mi>t</mi><mo>∗</mo></msubsup></mrow><annotation encoding="application/x-tex">y^*_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.935696em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-2.4530000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span></span></span></span> is obtained by applying a deterministic function to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">x_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> with parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>w</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">w^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.688696em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>b</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">b^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span>. Our goal is still to recover the parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>w</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">w^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.688696em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>b</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">b^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> from the observations <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>.</p> <li><p>Second modification: we now generate points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>t</mi></msub><mo>=</mo><mi>exp</mi><mo>⁡</mo><mo stretchy=false >(</mo><msup><mi>w</mi><mo>∗</mo></msup><mi>cos</mi><mo>⁡</mo><mo stretchy=false >(</mo><msup><mi>p</mi><mo>∗</mo></msup><msub><mi>x</mi><mi>t</mi></msub><mo stretchy=false >)</mo><mo>+</mo><msup><mi>b</mi><mo>∗</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">y_t= \exp(w^*\cos(p^*x_t)+b^*)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mop >exp</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop >cos</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>, i.e <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>y</mi><mi>t</mi><mo>∗</mo></msubsup></mrow><annotation encoding="application/x-tex">y^*_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.935696em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-2.4530000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span></span></span></span> is obtained by applying a deterministic function to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex">x_t</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> with parameters <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>p</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">p^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8831359999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>w</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">w^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.688696em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>b</mi><mo>∗</mo></msup></mrow><annotation encoding="application/x-tex">b^*</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault">b</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.688696em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mbin mtight">∗</span></span></span></span></span></span></span></span></span></span></span>. Our goal is still to recover the parameters from the observations <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mi>t</mi></msub><mo separator=true >,</mo><msub><mi>y</mi><mi>t</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_t,y_t)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.2805559999999999em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">t</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>.</p> </ul> <h2 id=bonus ><a href="#bonus" class=header-anchor >Bonus: </a></h2> <ul> <li><p><a href="https://jax.readthedocs.io/en/latest/index.html">JAX</a> implementation of the linear regression <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb">colab</a> see <a href="/website/modules/2c-jax">Module 2c</a> for more details.</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/2c-jax/index.html b/modules/2c-jax/index.html
index 548c42a..39c3632 100644
--- a/modules/2c-jax/index.html
+++ b/modules/2c-jax/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item active" href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_2c_-_automatic_differentiation_vjp_and_intro_to_jax ><a href="#module_2c_-_automatic_differentiation_vjp_and_intro_to_jax" class=header-anchor >Module 2c - Automatic differentiation: VJP and intro to JAX</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#jacobian">Jacobian</a><li><a href="#chain_composition">Chain composition</a><li><a href="#implementation">Implementation</a><ol><li><a href="#backprop_the_functional_way">Backprop the functional way</a></ol><li><a href="#practice">Practice</a></ol></div> <h1 id=autodiff_and_backpropagation ><a href="#autodiff_and_backpropagation" class=header-anchor >Autodiff and Backpropagation</a></h1> <h2 id=jacobian ><a href="#jacobian" class=header-anchor >Jacobian</a></h2> <p>Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{f}:\mathbb{R}^n\to \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, we define its Jacobian as:</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >x</mi></mrow></mfrac><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mrow><mo fence=true >(</mo><mtable rowspacing=0.15999999999999992em  columnalign="center center center" columnspacing=1em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mn>1</mn></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mn>1</mn></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mi>m</mi></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mi>m</mi></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac></mstyle></mtd></mtr></mtable><mo fence=true >)</mo></mrow></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mrow><mo fence=true >(</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac><mo fence=true >)</mo></mrow></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mrow><mo fence=true >(</mo><mtable rowspacing=0.15999999999999992em  columnalign=center  columnspacing=1em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow><mi mathvariant=normal >∇</mi><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow><mi mathvariant=normal >∇</mi><msub><mi>f</mi><mi>m</mi></msub><mo stretchy=false >(</mo><mi>x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow></mstyle></mtd></mtr></mtable><mo fence=true >)</mo></mrow></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \frac{\partial \mathbf{f}}{\partial \mathbf{x}} = J_{\mathbf{f}}(\mathbf{x}) &amp;= \left( \begin{array}{ccc} \frac{\partial f_1}{\partial x_1}&amp;\dots&amp; \frac{\partial f_1}{\partial x_n}\\ \vdots&amp;&amp;\vdots\\ \frac{\partial f_m}{\partial x_1}&amp;\dots&amp; \frac{\partial f_m}{\partial x_n} \end{array}\right)\\ &amp;=\left( \frac{\partial \mathbf{f}}{\partial x_1},\dots, \frac{\partial \mathbf{f}}{\partial x_n}\right)\\ &amp;=\left( \begin{array}{c} \nabla f_1(\mathbf{x})^T\\ \vdots\\ \nabla f_m(x)^T \end{array}\right) \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:12.177323999999999em;vertical-align:-5.838661999999999em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:6.338661999999999em;"><span style="top:-8.338662em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf">x</span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span><span style="top:-4.531346000000001em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ></span></span><span style="top:-0.8999850000000009em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:5.838661999999999em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:6.338661999999999em;"><span style="top:-8.338662em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span><span class=mord ><span class=mtable ><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.557316em;"><span style="top:-5.3126em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span><span style="top:-3.3675em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-2.075284000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">m</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.0573159999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.557316em;"><span style="top:-5.1251em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span><span style="top:-3.18em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-1.8877840000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.0573159999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.557316em;"><span style="top:-5.3126em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span><span style="top:-3.3675em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-2.075284000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">m</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.0573159999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span></span></span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-4.531346000000001em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span></span></span><span style="top:-0.8999850000000009em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span><span class=mord ><span class=mtable ><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.381331em;"><span style="top:-5.2275em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >∇</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span><span style="top:-3.3674999999999997em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-2.166169em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >∇</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.881331em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span></span></span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:5.838661999999999em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>Hence the Jacobian <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>m</mi><mo>×</mo><mi>n</mi></mrow></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})\in \mathbb{R}^{m\times n}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.771331em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.771331em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span></span> is a linear map from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span> such that for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi mathvariant=bold >v</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x},\mathbf{v} \in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.7335400000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>h</mi><mo>∈</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">h\in \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.73354em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">h</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span>:</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo>+</mo><mi>h</mi><mi mathvariant=bold >v</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>+</mo><mi>h</mi><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mi mathvariant=bold >v</mi><mo>+</mo><mi>o</mi><mo stretchy=false >(</mo><mi>h</mi><mo stretchy=false >)</mo><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mathbf{f}(\mathbf{x}+h\mathbf{v}) = \mathbf{f}(\mathbf{x}) + h J_{\mathbf{f}}(\mathbf{x})\mathbf{v} +o(h). \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5000000000000002em;vertical-align:-0.5000000000000002em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1em;"><span style="top:-3.16em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">h</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">h</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">o</span><span class=mopen >(</span><span class="mord mathdefault">h</span><span class=mclose >)</span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5000000000000002em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>The term <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mi mathvariant=bold >v</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})\mathbf{v}\in \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span> is a Jacobian Vector Product &#40;<strong>JVP</strong>&#41;, corresponding to the interpretation where the Jacobian is the linear map: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x}):\mathbb{R}^n \to \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi mathvariant=bold >v</mi><mo stretchy=false >)</mo><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mi mathvariant=bold >v</mi></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})(\mathbf{v})=J_{\mathbf{f}}(\mathbf{x})\mathbf{v}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span></span></span></span>.</p> <h2 id=chain_composition ><a href="#chain_composition" class=header-anchor >Chain composition</a></h2> <p>In machine learning, we are computing gradient of the loss function with respect to the parameters. In particular, if the parameters are high-dimensional, the loss is a real number. Hence, consider a real-valued function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo><mover><mo><mo>→</mo></mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub></mover></mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup><mo><mover><mo><mo>→</mo></mo><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub></mover></mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup><mo><mover><mo><mo>→</mo></mo><mi>h</mi></mover></mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbf{f}:\mathbb{R}^n\stackrel{\mathbf{g}_1}{\to}\mathbb{R}^m \stackrel{\mathbf{g}_2}{\to}\mathbb{R}^d\stackrel{h}{\to}\mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.025086em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class="mop op-limits"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:1.025086em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >→</span></span></span><span style="top:-3.613978em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.025086em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class="mop op-limits"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:1.025086em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >→</span></span></span><span style="top:-3.613978em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.152978em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class="mop op-limits"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:1.152978em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >→</span></span></span><span style="top:-3.5668699999999998em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">h</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span>, so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>h</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo>∈</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbf{f}(\mathbf{x}) = h(\mathbf{g}_2(\mathbf{g}_1(\mathbf{x})))\in \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">h</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span>. We have</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><munder><munder><mrow><mi mathvariant=normal >∇</mi><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>n</mi><mo>×</mo><mn>1</mn></mrow></munder><mo>=</mo><munder><munder><mrow><msub><mi>J</mi><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>n</mi><mo>×</mo><mi>m</mi></mrow></munder><munder><munder><mrow><msub><mi>J</mi><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub></msub><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>m</mi><mo>×</mo><mi>d</mi></mrow></munder><munder><munder><mrow><mi mathvariant=normal >∇</mi><mi>h</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>d</mi><mo>×</mo><mn>1</mn></mrow></munder><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \underbrace{\nabla\mathbf{f}(\mathbf{x})}_{n\times 1}=\underbrace{J_{\mathbf{g}_1}(\mathbf{x})^T}_{n\times m}\underbrace{J_{\mathbf{g}_2}(\mathbf{g}_1(\mathbf{x}))^T}_{m\times d}\underbrace{\nabla h(\mathbf{g}_2(\mathbf{g}_1(\mathbf{x})))}_{d\times 1}. \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.869878em;vertical-align:-1.184939em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.684939em;"><span style="top:-3.793608em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7500000000000002em;"><span style="top:-1.450892em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.0000000000000004em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.75em;"><span class=svg-align  style="top:-2.102em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∇</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.898em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.6074389999999998em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-1.4575609999999999em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">m</span></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span class=svg-align  style="top:-2.065892em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16110799999999997em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.934108em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.60077em;"><span></span></span></span></span></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999997em;"><span style="top:-1.379784em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span class=svg-align  style="top:-2.065892em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16110799999999997em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.934108em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.678547em;"><span></span></span></span></span></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.75em;"><span style="top:-1.415892em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mtight">1</span></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.75em;"><span class=svg-align  style="top:-2.102em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∇</span><span class="mord mathdefault">h</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.898em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.642439em;"><span></span></span></span></span></span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.184939em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>To do this computation, if we start from the right so that we start with a matrix times a vector to obtain a vector &#40;of size <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>m</mi></mrow><annotation encoding="application/x-tex">m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">m</span></span></span></span>&#41; and we need to make another matrix times a vector, resulting in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>O</mi><mo stretchy=false >(</mo><mi>n</mi><mi>m</mi><mo>+</mo><mi>m</mi><mi>d</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">O(nm+md)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">O</span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class="mord mathdefault">m</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">m</span><span class="mord mathdefault">d</span><span class=mclose >)</span></span></span></span> operations. If we start from the left with the matrix-matrix multiplication, we get <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>O</mi><mo stretchy=false >(</mo><mi>n</mi><mi>m</mi><mi>d</mi><mo>+</mo><mi>n</mi><mi>d</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">O(nmd+nd)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">O</span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class="mord mathdefault">m</span><span class="mord mathdefault">d</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">n</span><span class="mord mathdefault">d</span><span class=mclose >)</span></span></span></span> operations. Hence we see that as soon as <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>m</mi><mo>≈</mo><mi>d</mi></mrow><annotation encoding="application/x-tex">m\approx d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.48312em;vertical-align:0em;"></span><span class="mord mathdefault">m</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span>, starting for the right is much more efficient. Note however that doing the computation from the right to the left requires keeping in memory the values of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{g}_1(\mathbf{x})\in\mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x}\in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span>.</p> <p><strong>Backpropagation</strong> is an efficient algorithm computing the gradient &quot;from the right to the left&quot;, i.e. backward. In particular, we will need to compute quantities of the form: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})^T\mathbf{u} \in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{u} \in\mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span> which can be rewritten <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=bold >u</mi><mi>T</mi></msup><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\mathbf{u}^T J_{\mathbf{f}}(\mathbf{x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">u</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span> which is a Vector Jacobian Product &#40;<strong>VJP</strong>&#41;, correponding to the interpretation where the Jacobian is the linear map: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x}):\mathbb{R}^n \to \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, composed with the linear map <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >u</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbf{u}:\mathbb{R}^m\to \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=bold >u</mi><mi>T</mi></msup><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >u</mi><mo>∘</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\mathbf{u}^TJ_{\mathbf{f}}(\mathbf{x}) = \mathbf{u} \circ J_{\mathbf{f}}(\mathbf{x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">u</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.44445em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >∘</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span>.</p> <p><strong>example:</strong> let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >x</mi><mi>W</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>b</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{f}(\mathbf{x}, W) = \mathbf{x} W\in \mathbb{R}^b</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">b</span></span></span></span></span></span></span></span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>a</mi><mo>×</mo><mi>b</mi></mrow></msup></mrow><annotation encoding="application/x-tex">W\in \mathbb{R}^{a\times b}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">a</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">b</span></span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>a</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x}\in \mathbb{R}^a</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">a</span></span></span></span></span></span></span></span></span></span></span>. We clearly have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><msup><mi>W</mi><mi>T</mi></msup><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> J_{\mathbf{f}}(\mathbf{x}) = W^T. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8913309999999999em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord >.</span></span></span></span></span> <p>Note that here, we are slightly abusing notations and considering the partial function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>↦</mo><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\mathbf{x}\mapsto \mathbf{f}(\mathbf{x}, W)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.522em;vertical-align:-0.011em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >↦</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span></span></span></span>. To see this, we can write <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>j</mi></msub><mo>=</mo><msub><mo>∑</mo><mi>i</mi></msub><msub><mi>x</mi><mi>i</mi></msub><msub><mi>W</mi><mrow><mi>i</mi><mi>j</mi></mrow></msub></mrow><annotation encoding="application/x-tex">f_j = \sum_{i}x_iW_{ij}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.980548em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.0497100000000001em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> so that </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>i</mi></msub></mrow></mfrac><mo>=</mo><msup><mrow><mo fence=true >(</mo><msub><mi>W</mi><mrow><mi>i</mi><mn>1</mn></mrow></msub><mo>…</mo><msub><mi>W</mi><mrow><mi>i</mi><mi>b</mi></mrow></msub><mo fence=true >)</mo></mrow><mi>T</mi></msup></mrow><annotation encoding="application/x-tex"> \frac{\partial \mathbf{f}}{\partial x_i}= \left( W_{i1}\dots W_{ib}\right)^T </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.20744em;vertical-align:-0.8360000000000001em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.231231em;vertical-align:-0.25em;"></span><span class=minner ><span class=minner ><span class="mopen delimcenter" style="top:0em;">(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mathdefault mtight">b</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;">)</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span></span> <p>Then recall from definitions that</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mrow><mo fence=true >(</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac><mo fence=true >)</mo></mrow><mo>=</mo><msup><mi>W</mi><mi>T</mi></msup><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> J_{\mathbf{f}}(\mathbf{x}) = \left( \frac{\partial \mathbf{f}}{\partial x_1},\dots, \frac{\partial \mathbf{f}}{\partial x_n}\right)=W^T. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.40003em;vertical-align:-0.95003em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8913309999999999em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord >.</span></span></span></span></span> <p>Now we clearly have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi>W</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >x</mi><mtext> since, </mtext><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo>+</mo><mi mathvariant=normal >Δ</mi><mi>W</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo stretchy=false >)</mo><mo>+</mo><mi mathvariant=bold >x</mi><mi mathvariant=normal >Δ</mi><mi>W</mi><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> J_{\mathbf{f}}(W) = \mathbf{x} \text{ since, } \mathbf{f}(\mathbf{x}, W+\Delta W) = \mathbf{f}(\mathbf{x}, W) + \mathbf{x} \Delta W. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class="mord text"><span class=mord > since, </span></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Δ</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mord >Δ</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord >.</span></span></span></span></span> <p>Note that multiplying <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">\mathbf{x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">x</span></span></span></span></span> on the left is actually convenient when using broadcasting, i.e. we can take a batch of input vectors of shape <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>bs</mtext><mo>×</mo><mi>a</mi></mrow><annotation encoding="application/x-tex">\text{bs}\times a</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.77777em;vertical-align:-0.08333em;"></span><span class="mord text"><span class=mord >bs</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">a</span></span></span></span> without modifying the math above. </p> <h2 id=implementation ><a href="#implementation" class=header-anchor >Implementation</a></h2> <p>In PyTorch, <code>torch.autograd</code> provides classes and functions implementing automatic differentiation of arbitrary scalar-valued functions. To create a custom <a href="https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function">autograd.Function</a>, subclass this class and implement the <code>forward&#40;&#41;</code> and <code>backward&#40;&#41;</code> static methods. Here is an example:</p> <pre><code class="python hljs"><span class=hljs-keyword >class</span> <span class="hljs-title class_">Exp</span>(<span class="hljs-title class_ inherited__">Function</span>):
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item active" href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_2c_-_automatic_differentiation_vjp_and_intro_to_jax ><a href="#module_2c_-_automatic_differentiation_vjp_and_intro_to_jax" class=header-anchor >Module 2c - Automatic differentiation: VJP and intro to JAX</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#jacobian">Jacobian</a><li><a href="#chain_composition">Chain composition</a><li><a href="#implementation">Implementation</a><ol><li><a href="#backprop_the_functional_way">Backprop the functional way</a></ol><li><a href="#practice">Practice</a></ol></div> <h1 id=autodiff_and_backpropagation ><a href="#autodiff_and_backpropagation" class=header-anchor >Autodiff and Backpropagation</a></h1> <h2 id=jacobian ><a href="#jacobian" class=header-anchor >Jacobian</a></h2> <p>Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{f}:\mathbb{R}^n\to \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, we define its Jacobian as:</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >x</mi></mrow></mfrac><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mrow><mo fence=true >(</mo><mtable rowspacing=0.15999999999999992em  columnalign="center center center" columnspacing=1em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mn>1</mn></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mn>1</mn></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mi>m</mi></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mfrac><mrow><mi mathvariant=normal >∂</mi><msub><mi>f</mi><mi>m</mi></msub></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac></mstyle></mtd></mtr></mtable><mo fence=true >)</mo></mrow></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mrow><mo fence=true >(</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac><mo fence=true >)</mo></mrow></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><mrow><mo fence=true >(</mo><mtable rowspacing=0.15999999999999992em  columnalign=center  columnspacing=1em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow><mi mathvariant=normal >∇</mi><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow><mi mathvariant=normal >∇</mi><msub><mi>f</mi><mi>m</mi></msub><mo stretchy=false >(</mo><mi>x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow></mstyle></mtd></mtr></mtable><mo fence=true >)</mo></mrow></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \frac{\partial \mathbf{f}}{\partial \mathbf{x}} = J_{\mathbf{f}}(\mathbf{x}) &amp;= \left( \begin{array}{ccc} \frac{\partial f_1}{\partial x_1}&amp;\dots&amp; \frac{\partial f_1}{\partial x_n}\\ \vdots&amp;&amp;\vdots\\ \frac{\partial f_m}{\partial x_1}&amp;\dots&amp; \frac{\partial f_m}{\partial x_n} \end{array}\right)\\ &amp;=\left( \frac{\partial \mathbf{f}}{\partial x_1},\dots, \frac{\partial \mathbf{f}}{\partial x_n}\right)\\ &amp;=\left( \begin{array}{c} \nabla f_1(\mathbf{x})^T\\ \vdots\\ \nabla f_m(x)^T \end{array}\right) \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:12.177323999999999em;vertical-align:-5.838661999999999em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:6.338661999999999em;"><span style="top:-8.338662em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf">x</span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span><span style="top:-4.531346000000001em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ></span></span><span style="top:-0.8999850000000009em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:5.838661999999999em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:6.338661999999999em;"><span style="top:-8.338662em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span><span class=mord ><span class=mtable ><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.557316em;"><span style="top:-5.3126em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span><span style="top:-3.3675em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-2.075284000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">m</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.0573159999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.557316em;"><span style="top:-5.1251em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span><span style="top:-3.18em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-1.8877840000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.0573159999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.557316em;"><span style="top:-5.3126em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span><span style="top:-3.3675em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-2.075284000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.9322159999999999em;"><span style="top:-2.655em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.446108em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight" style="margin-right:0.05556em;">∂</span><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16454285714285719em;"><span style="top:-2.357em;margin-left:-0.10764em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">m</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.44509999999999994em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:2.0573159999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span></span></span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span></span></span></span><span style="top:-4.531346000000001em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span></span></span><span style="top:-0.8999850000000009em;"><span class=pstrut  style="height:4.557316em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span><span class=mord ><span class=mtable ><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.381331em;"><span style="top:-5.2275em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >∇</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span><span style="top:-3.3674999999999997em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-2.166169em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >∇</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.881331em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span></span></span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.3500249999999996em;"><span style="top:-1.9499950000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-3.1000050000000003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.110005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.350025em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.850025em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:5.838661999999999em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>Hence the Jacobian <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>m</mi><mo>×</mo><mi>n</mi></mrow></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})\in \mathbb{R}^{m\times n}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.771331em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.771331em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span></span> is a linear map from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span> such that for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi mathvariant=bold >v</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x},\mathbf{v} \in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.7335400000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>h</mi><mo>∈</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">h\in \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.73354em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">h</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span>:</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo>+</mo><mi>h</mi><mi mathvariant=bold >v</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>+</mo><mi>h</mi><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mi mathvariant=bold >v</mi><mo>+</mo><mi>o</mi><mo stretchy=false >(</mo><mi>h</mi><mo stretchy=false >)</mo><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \mathbf{f}(\mathbf{x}+h\mathbf{v}) = \mathbf{f}(\mathbf{x}) + h J_{\mathbf{f}}(\mathbf{x})\mathbf{v} +o(h). \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.5000000000000002em;vertical-align:-0.5000000000000002em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1em;"><span style="top:-3.16em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">h</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">h</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault">o</span><span class=mopen >(</span><span class="mord mathdefault">h</span><span class=mclose >)</span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.5000000000000002em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>The term <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mi mathvariant=bold >v</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})\mathbf{v}\in \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span> is a Jacobian Vector Product &#40;<strong>JVP</strong>&#41;, corresponding to the interpretation where the Jacobian is the linear map: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x}):\mathbb{R}^n \to \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi mathvariant=bold >v</mi><mo stretchy=false >)</mo><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mi mathvariant=bold >v</mi></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})(\mathbf{v})=J_{\mathbf{f}}(\mathbf{x})\mathbf{v}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span></span></span></span></span>.</p> <h2 id=chain_composition ><a href="#chain_composition" class=header-anchor >Chain composition</a></h2> <p>In machine learning, we are computing gradient of the loss function with respect to the parameters. In particular, if the parameters are high-dimensional, the loss is a real number. Hence, consider a real-valued function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo><mover><mo><mo>→</mo></mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub></mover></mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup><mo><mover><mo><mo>→</mo></mo><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub></mover></mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup><mo><mover><mo><mo>→</mo></mo><mi>h</mi></mover></mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbf{f}:\mathbb{R}^n\stackrel{\mathbf{g}_1}{\to}\mathbb{R}^m \stackrel{\mathbf{g}_2}{\to}\mathbb{R}^d\stackrel{h}{\to}\mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.025086em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class="mop op-limits"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:1.025086em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >→</span></span></span><span style="top:-3.613978em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.025086em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class="mop op-limits"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:1.025086em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >→</span></span></span><span style="top:-3.613978em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.152978em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class="mop op-limits"><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:1.152978em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >→</span></span></span><span style="top:-3.5668699999999998em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">h</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span>, so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>h</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo>∈</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbf{f}(\mathbf{x}) = h(\mathbf{g}_2(\mathbf{g}_1(\mathbf{x})))\in \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">h</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span>. We have</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign=right  columnspacing=""><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><munder><munder><mrow><mi mathvariant=normal >∇</mi><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>n</mi><mo>×</mo><mn>1</mn></mrow></munder><mo>=</mo><munder><munder><mrow><msub><mi>J</mi><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>n</mi><mo>×</mo><mi>m</mi></mrow></munder><munder><munder><mrow><msub><mi>J</mi><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub></msub><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><msup><mo stretchy=false >)</mo><mi>T</mi></msup></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>m</mi><mo>×</mo><mi>d</mi></mrow></munder><munder><munder><mrow><mi mathvariant=normal >∇</mi><mi>h</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>2</mn></msub><mo stretchy=false >(</mo><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><mo stretchy=true >⏟</mo></munder><mrow><mi>d</mi><mo>×</mo><mn>1</mn></mrow></munder><mi mathvariant=normal >.</mi></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} \underbrace{\nabla\mathbf{f}(\mathbf{x})}_{n\times 1}=\underbrace{J_{\mathbf{g}_1}(\mathbf{x})^T}_{n\times m}\underbrace{J_{\mathbf{g}_2}(\mathbf{g}_1(\mathbf{x}))^T}_{m\times d}\underbrace{\nabla h(\mathbf{g}_2(\mathbf{g}_1(\mathbf{x})))}_{d\times 1}. \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.869878em;vertical-align:-1.184939em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.684939em;"><span style="top:-3.793608em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7500000000000002em;"><span style="top:-1.450892em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.0000000000000004em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.75em;"><span class=svg-align  style="top:-2.102em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∇</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.898em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.6074389999999998em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-1.4575609999999999em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">m</span></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span class=svg-align  style="top:-2.065892em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16110799999999997em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.934108em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.60077em;"><span></span></span></span></span></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999997em;"><span style="top:-1.379784em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span class=svg-align  style="top:-2.065892em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16110799999999997em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.934108em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.678547em;"><span></span></span></span></span></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.75em;"><span style="top:-1.415892em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">d</span><span class="mbin mtight">×</span><span class="mord mtight">1</span></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class="mord munder"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.75em;"><span class=svg-align  style="top:-2.102em;"><span class=pstrut  style="height:3em;"></span><span class=stretchy  style="height:0.548em;min-width:1.6em;"><span class=brace-left  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMinYMin slice'><path d='M0 6l6-6h17c12.688 0 19.313.3 20 1 4 4 7.313 8.3 10 13 35.313 51.3 80.813 93.8 136.5 127.5 55.688 33.7 117.188 55.8 184.5 66.5.688 0 2 .3 4 1 18.688 2.7 76 4.3 172 5h399450v120H429l-6-1c-124.688-8-235-61.7 -331-161C60.687 138.7 32.312 99.3 7 54L0 41V6z'/></svg></span><span class=brace-center  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMidYMin slice'><path d='M199572 214 c100.7 8.3 195.3 44 280 108 55.3 42 101.7 93 139 153l9 14c2.7-4 5.7-8.7 9-14 53.3-86.7 123.7-153 211-199 66.7-36 137.3-56.3 212-62h199568v120H200432c-178.3 11.7-311.7 78.3-403 201-6 8-9.7 12-11 12-.7.7-6.7 1-18 1s-17.3-.3-18-1c-1.3 0 -5-4-11-12-44.7-59.3-101.3-106.3-170-141s-145.3-54.3-229-60H0V214z'/></svg></span><span class=brace-right  style="height:0.548em;"><svg width='400em' height='0.548em' viewBox='0 0 400000 548' preserveAspectRatio='xMaxYMin slice'><path d='M399994 0l6 6v35l-6 11c-56 104-135.3 181.3-238 232-57.3 28.7-117 45-179 50H-300V214h399897c43.3-7 81-15 113-26 100.7-33 179.7-91 237 -174 2.7-5 6-9 10-13 .7-1 7.3-1 20-1h17z'/></svg></span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∇</span><span class="mord mathdefault">h</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mclose >)</span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.898em;"><span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.642439em;"><span></span></span></span></span></span><span class=mord >.</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.184939em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>To do this computation, if we start from the right so that we start with a matrix times a vector to obtain a vector &#40;of size <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>m</mi></mrow><annotation encoding="application/x-tex">m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">m</span></span></span></span>&#41; and we need to make another matrix times a vector, resulting in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>O</mi><mo stretchy=false >(</mo><mi>n</mi><mi>m</mi><mo>+</mo><mi>m</mi><mi>d</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">O(nm+md)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">O</span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class="mord mathdefault">m</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">m</span><span class="mord mathdefault">d</span><span class=mclose >)</span></span></span></span> operations. If we start from the left with the matrix-matrix multiplication, we get <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>O</mi><mo stretchy=false >(</mo><mi>n</mi><mi>m</mi><mi>d</mi><mo>+</mo><mi>n</mi><mi>d</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">O(nmd+nd)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">O</span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class="mord mathdefault">m</span><span class="mord mathdefault">d</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">n</span><span class="mord mathdefault">d</span><span class=mclose >)</span></span></span></span> operations. Hence we see that as soon as <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>m</mi><mo>≈</mo><mi>d</mi></mrow><annotation encoding="application/x-tex">m\approx d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.48312em;vertical-align:0em;"></span><span class="mord mathdefault">m</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span>, starting for the right is much more efficient. Note however that doing the computation from the right to the left requires keeping in memory the values of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >g</mi><mn>1</mn></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{g}_1(\mathbf{x})\in\mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">g</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x}\in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span>.</p> <p><strong>Backpropagation</strong> is an efficient algorithm computing the gradient &quot;from the right to the left&quot;, i.e. backward. In particular, we will need to compute quantities of the form: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})^T\mathbf{u} \in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{u} \in\mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span> which can be rewritten <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=bold >u</mi><mi>T</mi></msup><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\mathbf{u}^T J_{\mathbf{f}}(\mathbf{x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">u</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span> which is a Vector Jacobian Product &#40;<strong>VJP</strong>&#41;, correponding to the interpretation where the Jacobian is the linear map: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x}):\mathbb{R}^n \to \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>, composed with the linear map <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >u</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbf{u}:\mathbb{R}^m\to \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=bold >u</mi><mi>T</mi></msup><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >u</mi><mo>∘</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\mathbf{u}^TJ_{\mathbf{f}}(\mathbf{x}) = \mathbf{u} \circ J_{\mathbf{f}}(\mathbf{x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">u</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.44445em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >∘</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span>.</p> <p><strong>example:</strong> let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >x</mi><mi>W</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>b</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{f}(\mathbf{x}, W) = \mathbf{x} W\in \mathbb{R}^b</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">b</span></span></span></span></span></span></span></span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>a</mi><mo>×</mo><mi>b</mi></mrow></msup></mrow><annotation encoding="application/x-tex">W\in \mathbb{R}^{a\times b}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">a</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">b</span></span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>a</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x}\in \mathbb{R}^a</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">a</span></span></span></span></span></span></span></span></span></span></span>. We clearly have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><msup><mi>W</mi><mi>T</mi></msup><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> J_{\mathbf{f}}(\mathbf{x}) = W^T. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8913309999999999em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord >.</span></span></span></span></span> <p>Note that here, we are slightly abusing notations and considering the partial function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>↦</mo><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\mathbf{x}\mapsto \mathbf{f}(\mathbf{x}, W)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.522em;vertical-align:-0.011em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >↦</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span></span></span></span>. To see this, we can write <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>j</mi></msub><mo>=</mo><msub><mo>∑</mo><mi>i</mi></msub><msub><mi>x</mi><mi>i</mi></msub><msub><mi>W</mi><mrow><mi>i</mi><mi>j</mi></mrow></msub></mrow><annotation encoding="application/x-tex">f_j = \sum_{i}x_iW_{ij}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.980548em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.0497100000000001em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> so that </p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>i</mi></msub></mrow></mfrac><mo>=</mo><msup><mrow><mo fence=true >(</mo><msub><mi>W</mi><mrow><mi>i</mi><mn>1</mn></mrow></msub><mo>…</mo><msub><mi>W</mi><mrow><mi>i</mi><mi>b</mi></mrow></msub><mo fence=true >)</mo></mrow><mi>T</mi></msup></mrow><annotation encoding="application/x-tex"> \frac{\partial \mathbf{f}}{\partial x_i}= \left( W_{i1}\dots W_{ib}\right)^T </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.20744em;vertical-align:-0.8360000000000001em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.231231em;vertical-align:-0.25em;"></span><span class=minner ><span class=minner ><span class="mopen delimcenter" style="top:0em;">(</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mathdefault mtight">b</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;">)</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.981231em;"><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span></span></span></span></span> <p>Then recall from definitions that</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mrow><mo fence=true >(</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mn>1</mn></msub></mrow></mfrac><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mfrac><mrow><mi mathvariant=normal >∂</mi><mi mathvariant=bold >f</mi></mrow><mrow><mi mathvariant=normal >∂</mi><msub><mi>x</mi><mi>n</mi></msub></mrow></mfrac><mo fence=true >)</mo></mrow><mo>=</mo><msup><mi>W</mi><mi>T</mi></msup><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> J_{\mathbf{f}}(\mathbf{x}) = \left( \frac{\partial \mathbf{f}}{\partial x_1},\dots, \frac{\partial \mathbf{f}}{\partial x_n}\right)=W^T. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.40003em;vertical-align:-0.95003em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size3">(</span></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.37144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord  style="margin-right:0.05556em;">∂</span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size3">)</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8913309999999999em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord >.</span></span></span></span></span> <p>Now we clearly have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi>W</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >x</mi><mtext> since, </mtext><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo>+</mo><mi mathvariant=normal >Δ</mi><mi>W</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi>W</mi><mo stretchy=false >)</mo><mo>+</mo><mi mathvariant=bold >x</mi><mi mathvariant=normal >Δ</mi><mi>W</mi><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> J_{\mathbf{f}}(W) = \mathbf{x} \text{ since, } \mathbf{f}(\mathbf{x}, W+\Delta W) = \mathbf{f}(\mathbf{x}, W) + \mathbf{x} \Delta W. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class="mord text"><span class=mord > since, </span></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Δ</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mord >Δ</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord >.</span></span></span></span></span> <p>Note that multiplying <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">\mathbf{x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">x</span></span></span></span></span> on the left is actually convenient when using broadcasting, i.e. we can take a batch of input vectors of shape <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>bs</mtext><mo>×</mo><mi>a</mi></mrow><annotation encoding="application/x-tex">\text{bs}\times a</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.77777em;vertical-align:-0.08333em;"></span><span class="mord text"><span class=mord >bs</span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">a</span></span></span></span> without modifying the math above. </p> <h2 id=implementation ><a href="#implementation" class=header-anchor >Implementation</a></h2> <p>In PyTorch, <code>torch.autograd</code> provides classes and functions implementing automatic differentiation of arbitrary scalar-valued functions. To create a custom <a href="https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function">autograd.Function</a>, subclass this class and implement the <code>forward&#40;&#41;</code> and <code>backward&#40;&#41;</code> static methods. Here is an example:</p> <pre><code class="python hljs"><span class=hljs-keyword >class</span> <span class="hljs-title class_">Exp</span>(<span class="hljs-title class_ inherited__">Function</span>):
 <span class=hljs-meta >    @staticmethod</span>
     <span class=hljs-keyword >def</span> <span class="hljs-title function_">forward</span>(<span class=hljs-params >ctx, i</span>):
         result = i.exp()
@@ -9,4 +9,4 @@
         result, = ctx.saved_tensors
         <span class=hljs-keyword >return</span> grad_output * result
 <span class=hljs-comment ># Use it by calling the apply method:</span>
-output = Exp.apply(<span class=hljs-built_in >input</span>)</code></pre> <p>You can have a look at <a href="https://dataflowr.github.io/website/modules/2b-automatic-differentiation">Module 2b</a> to learn more about this approach as well as <a href="https://dataflowr.github.io/website/homework/1-mlp-from-scratch/">MLP from scratch</a>.</p> <h3 id=backprop_the_functional_way ><a href="#backprop_the_functional_way" class=header-anchor >Backprop the functional way</a></h3> <p>Here we will implement in <code>numpy</code> a different approach mimicking the functional approach of <a href="https://jax.readthedocs.io/en/latest/index.html">JAX</a> see <a href="https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html#">The Autodiff Cookbook</a>.</p> <p>Each function will take 2 arguments: one being the input <code>x</code> and the other being the parameters <code>w</code>. For each function, we build 2 <strong>vjp</strong> functions taking as argument a gradient <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >u</mi></mrow><annotation encoding="application/x-tex">\mathbf{u}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">u</span></span></span></span></span>, and corresponding to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{w})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose >)</span></span></span></span> so that these functions return <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})^T \mathbf{u}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{w})^T \mathbf{u}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span></span></span></span> respectively. To summarize, for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x} \in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >w</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{w} \in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>, and, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi mathvariant=bold >w</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{f}(\mathbf{x},\mathbf{w}) \in \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>,</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mrow><mi mathvariant=bold >v</mi><mi mathvariant=bold >j</mi><mi mathvariant=bold >p</mi></mrow><mi mathvariant=bold >x</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >u</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi><mo separator=true >,</mo><mtext> with </mtext><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>m</mi><mo>×</mo><mi>n</mi></mrow></msup><mo separator=true >,</mo><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mrow><mi mathvariant=bold >v</mi><mi mathvariant=bold >j</mi><mi mathvariant=bold >p</mi></mrow><mi mathvariant=bold >w</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >u</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi><mo separator=true >,</mo><mtext> with </mtext><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>m</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} {\bf vjp}_\mathbf{x}(\mathbf{u}) &amp;= J_{\mathbf{f}}(\mathbf{x})^T \mathbf{u}, \text{ with } J_{\mathbf{f}}(\mathbf{x})\in\mathbb{R}^{m\times n}, \mathbf{u}\in \mathbb{R}^m\\ {\bf vjp}_\mathbf{w}(\mathbf{u}) &amp;= J_{\mathbf{f}}(\mathbf{w})^T \mathbf{u}, \text{ with } J_{\mathbf{f}}(\mathbf{w})\in\mathbb{R}^{m\times d}, \mathbf{u}\in \mathbb{R}^m \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:3.110439em;vertical-align:-1.3052195em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8052195em;"><span style="top:-3.9138885em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span><span class="mord mathbf">j</span><span class="mord mathbf">p</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.066968em;"><span style="top:-2.4558600000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathbf mtight">x</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24414em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">u</span></span><span class=mclose >)</span></span></span><span style="top:-2.3547805000000004em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span><span class="mord mathbf">j</span><span class="mord mathbf">p</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.066968em;"><span style="top:-2.4558600000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">w</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24414em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">u</span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.3052195em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8052195em;"><span style="top:-3.9138885em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.821331em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span><span style="top:-2.3547805000000004em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.3052195em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>Then backpropagation is simply done by first computing the gradient of the loss and then composing the <strong>vjp</strong> functions in the right order.</p> <h2 id=practice ><a href="#practice" class=header-anchor >Practice</a></h2> <ul> <li><p>intro to JAX: autodiff the functional way <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/autodiff_functional_empty.ipynb">autodiff&#95;functional&#95;empty.ipynb</a> and its solution <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/autodiff_functional_sol.ipynb">autodiff&#95;functional&#95;sol.ipynb</a></p> <li><p>Linear regression in JAX <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb">linear&#95;regression&#95;jax.ipynb</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+output = Exp.apply(<span class=hljs-built_in >input</span>)</code></pre> <p>You can have a look at <a href="https://dataflowr.github.io/website/modules/2b-automatic-differentiation">Module 2b</a> to learn more about this approach as well as <a href="https://dataflowr.github.io/website/homework/1-mlp-from-scratch/">MLP from scratch</a>.</p> <h3 id=backprop_the_functional_way ><a href="#backprop_the_functional_way" class=header-anchor >Backprop the functional way</a></h3> <p>Here we will implement in <code>numpy</code> a different approach mimicking the functional approach of <a href="https://jax.readthedocs.io/en/latest/index.html">JAX</a> see <a href="https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html#">The Autodiff Cookbook</a>.</p> <p>Each function will take 2 arguments: one being the input <code>x</code> and the other being the parameters <code>w</code>. For each function, we build 2 <strong>vjp</strong> functions taking as argument a gradient <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >u</mi></mrow><annotation encoding="application/x-tex">\mathbf{u}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbf">u</span></span></span></span></span>, and corresponding to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{w})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose >)</span></span></span></span> so that these functions return <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{x})^T \mathbf{u}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi></mrow><annotation encoding="application/x-tex">J_{\mathbf{f}}(\mathbf{w})^T \mathbf{u}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0913309999999998em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span></span></span></span> respectively. To summarize, for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{x} \in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf">x</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >w</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{w} \in \mathbb{R}^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span>, and, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo separator=true >,</mo><mi mathvariant=bold >w</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow><annotation encoding="application/x-tex">\mathbf{f}(\mathbf{x},\mathbf{w}) \in \mathbb{R}^m</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.10903em;">f</span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span>,</p> <div class=nonumber ><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mtable rowspacing=0.24999999999999992em  columnalign="right left" columnspacing=0em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mrow><mi mathvariant=bold >v</mi><mi mathvariant=bold >j</mi><mi mathvariant=bold >p</mi></mrow><mi mathvariant=bold >x</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >u</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi><mo separator=true >,</mo><mtext> with </mtext><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>m</mi><mo>×</mo><mi>n</mi></mrow></msup><mo separator=true >,</mo><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><msub><mrow><mi mathvariant=bold >v</mi><mi mathvariant=bold >j</mi><mi mathvariant=bold >p</mi></mrow><mi mathvariant=bold >w</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >u</mi><mo stretchy=false >)</mo></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=true ><mrow><mrow></mrow><mo>=</mo><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><msup><mo stretchy=false >)</mo><mi>T</mi></msup><mi mathvariant=bold >u</mi><mo separator=true >,</mo><mtext> with </mtext><msub><mi>J</mi><mi mathvariant=bold >f</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >w</mi><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>m</mi><mo>×</mo><mi>d</mi></mrow></msup><mo separator=true >,</mo><mi mathvariant=bold >u</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>m</mi></msup></mrow></mstyle></mtd></mtr></mtable><annotation encoding="application/x-tex">\begin{aligned} {\bf vjp}_\mathbf{x}(\mathbf{u}) &amp;= J_{\mathbf{f}}(\mathbf{x})^T \mathbf{u}, \text{ with } J_{\mathbf{f}}(\mathbf{x})\in\mathbb{R}^{m\times n}, \mathbf{u}\in \mathbb{R}^m\\ {\bf vjp}_\mathbf{w}(\mathbf{u}) &amp;= J_{\mathbf{f}}(\mathbf{w})^T \mathbf{u}, \text{ with } J_{\mathbf{f}}(\mathbf{w})\in\mathbb{R}^{m\times d}, \mathbf{u}\in \mathbb{R}^m \end{aligned}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:3.110439em;vertical-align:-1.3052195em;"></span><span class=mord ><span class=mtable ><span class=col-align-r ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8052195em;"><span style="top:-3.9138885em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span><span class="mord mathbf">j</span><span class="mord mathbf">p</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.066968em;"><span style="top:-2.4558600000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathbf mtight">x</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24414em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">u</span></span><span class=mclose >)</span></span></span><span style="top:-2.3547805000000004em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">v</span><span class="mord mathbf">j</span><span class="mord mathbf">p</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.066968em;"><span style="top:-2.4558600000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.01597em;">w</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24414em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">u</span></span><span class=mclose >)</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.3052195em;"><span></span></span></span></span></span><span class=col-align-l ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8052195em;"><span style="top:-3.9138885em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf">x</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.821331em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span><span style="top:-2.3547805000000004em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord text"><span class=mord > with </span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.09618em;">J</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.09618em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathbf mtight" style="margin-right:0.10903em;">f</span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">w</span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8991079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathbf">u</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">m</span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.3052195em;"><span></span></span></span></span></span></span></span></span></span></span></span></div> <p>Then backpropagation is simply done by first computing the gradient of the loss and then composing the <strong>vjp</strong> functions in the right order.</p> <h2 id=practice ><a href="#practice" class=header-anchor >Practice</a></h2> <ul> <li><p>intro to JAX: autodiff the functional way <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/autodiff_functional_empty.ipynb">autodiff&#95;functional&#95;empty.ipynb</a> and its solution <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/autodiff_functional_sol.ipynb">autodiff&#95;functional&#95;sol.ipynb</a></p> <li><p>Linear regression in JAX <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/linear_regression_jax.ipynb">linear&#95;regression&#95;jax.ipynb</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/3-loss-functions-for-classification/index.html b/modules/3-loss-functions-for-classification/index.html
index 8ad19f3..906d58d 100644
--- a/modules/3-loss-functions-for-classification/index.html
+++ b/modules/3-loss-functions-for-classification/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item active" href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_3_-_loss_functions_for_classification ><a href="#module_3_-_loss_functions_for_classification" class=header-anchor >Module 3 - Loss functions for classification</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#loss_functions_for_classification">Loss functions for classification</a><li><a href="#slides_and_notebook">Slides and Notebook</a><li><a href="#minimal_working_examples">Minimal working examples</a><ol><li><a href="#a_hrefhttpspytorchorgdocsstablegeneratedtorchnnbcelosshtmltorchnnbcelossbceloss"><a href="https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss"><code>BCELoss</code></a></a><li><a href="#a_hrefhttpspytorchorgdocsstablegeneratedtorchnnnlllosshtmltorchnnnlllossnllloss_and_a_hrefhttpspytorchorgdocsstablegeneratedtorchnncrossentropylosshtmltorchnncrossentropylosscrossentropyloss"><a href="https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html#torch.nn.NLLLoss"><code>NLLLoss</code></a> and <a href="https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#torch.nn.CrossEntropyLoss"><code>CrossEntropyLoss</code></a></a></ol><li><a href="#quiz">Quiz</a></ol></div> <h2 id=loss_functions_for_classification ><a href="#loss_functions_for_classification" class=header-anchor >Loss functions for classification</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'jReGEZXq4Ac', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(145,0)'> 2:25</a> How to choose your loss? <br> <a href='#player' onclick='changeYouTubeSource(198,0)'> 3:18</a> A probabilistic model for linear regression <br> <a href='#player' onclick='changeYouTubeSource(470,0)'> 7:50</a> Gradient descent, learning rate, SGD <br> <a href='#player' onclick='changeYouTubeSource(690,0)'> 11:30</a> Pytorch code for gradient descent <br> <a href='#player' onclick='changeYouTubeSource(915,0)'> 15:15</a> A probabilistic model for logistic regression <br> <a href='#player' onclick='changeYouTubeSource(1047,0)'> 17:27</a> Notations (information theory) <br> <a href='#player' onclick='changeYouTubeSource(1258,0)'> 20:58</a> Likelihood for logistic regression <br> <a href='#player' onclick='changeYouTubeSource(1363,0)'> 22:43</a> BCELoss <br> <a href='#player' onclick='changeYouTubeSource(1421,0)'> 23:41</a> BCEWithLogitsLoss <br> <a href='#player' onclick='changeYouTubeSource(1537,0)'> 25:37</a> Beware of the reduction parameter <br> <a href='#player' onclick='changeYouTubeSource(1647,0)'> 27:27</a> Softmax regression <br> <a href='#player' onclick='changeYouTubeSource(1852,0)'> 30:52</a> NLLLoss <br> <a href='#player' onclick='changeYouTubeSource(2088,0)'> 34:48</a> Classification in pytorch <br> <a href='#player' onclick='changeYouTubeSource(2196,0)'> 36:36</a> Why maximizing accuracy directly is hard? <br> <a href='#player' onclick='changeYouTubeSource(2304,0)'> 38:24</a> Classification in deep learning <br> <a href='#player' onclick='changeYouTubeSource(2450,0)'> 40:50</a> Regression without knowing the underlying model <br> <a href='#player' onclick='changeYouTubeSource(2578,0)'> 42:58</a> Overfitting in polynomial regression <br> <a href='#player' onclick='changeYouTubeSource(2720,0)'> 45:20</a> Validation set <br> <a href='#player' onclick='changeYouTubeSource(2935,0)'> 48:55</a> Notion of risk and hypothesis space <br> <a href='#player' onclick='changeYouTubeSource(3280,0)'> 54:40</a> estimation error and approximation error </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module3.html">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module3/03_polynomial_regression.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module3/03_polynomial_regression.ipynb">colab</a> An explanation of underfitting and overfitting with polynomial regression.</p> </ul> <h2 id=minimal_working_examples ><a href="#minimal_working_examples" class=header-anchor >Minimal working examples</a></h2> <h3 id=a_hrefhttpspytorchorgdocsstablegeneratedtorchnnbcelosshtmltorchnnbcelossbceloss ><a href="#a_hrefhttpspytorchorgdocsstablegeneratedtorchnnbcelosshtmltorchnnbcelossbceloss" class=header-anchor ><a href="https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss"><code>BCELoss</code></a></a></h3> <pre><code class="python hljs"><span class=hljs-keyword >import</span> torch.nn <span class=hljs-keyword >as</span> nn
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item active" href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_3_-_loss_functions_for_classification ><a href="#module_3_-_loss_functions_for_classification" class=header-anchor >Module 3 - Loss functions for classification</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#loss_functions_for_classification">Loss functions for classification</a><li><a href="#slides_and_notebook">Slides and Notebook</a><li><a href="#minimal_working_examples">Minimal working examples</a><ol><li><a href="#a_hrefhttpspytorchorgdocsstablegeneratedtorchnnbcelosshtmltorchnnbcelossbceloss"><a href="https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss"><code>BCELoss</code></a></a><li><a href="#a_hrefhttpspytorchorgdocsstablegeneratedtorchnnnlllosshtmltorchnnnlllossnllloss_and_a_hrefhttpspytorchorgdocsstablegeneratedtorchnncrossentropylosshtmltorchnncrossentropylosscrossentropyloss"><a href="https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html#torch.nn.NLLLoss"><code>NLLLoss</code></a> and <a href="https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#torch.nn.CrossEntropyLoss"><code>CrossEntropyLoss</code></a></a></ol><li><a href="#quiz">Quiz</a></ol></div> <h2 id=loss_functions_for_classification ><a href="#loss_functions_for_classification" class=header-anchor >Loss functions for classification</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'jReGEZXq4Ac', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(145,0)'> 2:25</a> How to choose your loss? <br> <a href='#player' onclick='changeYouTubeSource(198,0)'> 3:18</a> A probabilistic model for linear regression <br> <a href='#player' onclick='changeYouTubeSource(470,0)'> 7:50</a> Gradient descent, learning rate, SGD <br> <a href='#player' onclick='changeYouTubeSource(690,0)'> 11:30</a> Pytorch code for gradient descent <br> <a href='#player' onclick='changeYouTubeSource(915,0)'> 15:15</a> A probabilistic model for logistic regression <br> <a href='#player' onclick='changeYouTubeSource(1047,0)'> 17:27</a> Notations (information theory) <br> <a href='#player' onclick='changeYouTubeSource(1258,0)'> 20:58</a> Likelihood for logistic regression <br> <a href='#player' onclick='changeYouTubeSource(1363,0)'> 22:43</a> BCELoss <br> <a href='#player' onclick='changeYouTubeSource(1421,0)'> 23:41</a> BCEWithLogitsLoss <br> <a href='#player' onclick='changeYouTubeSource(1537,0)'> 25:37</a> Beware of the reduction parameter <br> <a href='#player' onclick='changeYouTubeSource(1647,0)'> 27:27</a> Softmax regression <br> <a href='#player' onclick='changeYouTubeSource(1852,0)'> 30:52</a> NLLLoss <br> <a href='#player' onclick='changeYouTubeSource(2088,0)'> 34:48</a> Classification in pytorch <br> <a href='#player' onclick='changeYouTubeSource(2196,0)'> 36:36</a> Why maximizing accuracy directly is hard? <br> <a href='#player' onclick='changeYouTubeSource(2304,0)'> 38:24</a> Classification in deep learning <br> <a href='#player' onclick='changeYouTubeSource(2450,0)'> 40:50</a> Regression without knowing the underlying model <br> <a href='#player' onclick='changeYouTubeSource(2578,0)'> 42:58</a> Overfitting in polynomial regression <br> <a href='#player' onclick='changeYouTubeSource(2720,0)'> 45:20</a> Validation set <br> <a href='#player' onclick='changeYouTubeSource(2935,0)'> 48:55</a> Notion of risk and hypothesis space <br> <a href='#player' onclick='changeYouTubeSource(3280,0)'> 54:40</a> estimation error and approximation error </p> <h2 id=slides_and_notebook ><a href="#slides_and_notebook" class=header-anchor >Slides and Notebook</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module3.html">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module3/03_polynomial_regression.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module3/03_polynomial_regression.ipynb">colab</a> An explanation of underfitting and overfitting with polynomial regression.</p> </ul> <h2 id=minimal_working_examples ><a href="#minimal_working_examples" class=header-anchor >Minimal working examples</a></h2> <h3 id=a_hrefhttpspytorchorgdocsstablegeneratedtorchnnbcelosshtmltorchnnbcelossbceloss ><a href="#a_hrefhttpspytorchorgdocsstablegeneratedtorchnnbcelosshtmltorchnnbcelossbceloss" class=header-anchor ><a href="https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss"><code>BCELoss</code></a></a></h3> <pre><code class="python hljs"><span class=hljs-keyword >import</span> torch.nn <span class=hljs-keyword >as</span> nn
 m = nn.Sigmoid()
 loss = nn.BCELoss()
 <span class=hljs-built_in >input</span> = torch.randn(<span class=hljs-number >3</span>,<span class=hljs-number >4</span>,<span class=hljs-number >5</span>)
@@ -10,4 +10,4 @@
 C = <span class=hljs-number >8</span>
 <span class=hljs-built_in >input</span> = torch.randn(<span class=hljs-number >3</span>,C,<span class=hljs-number >4</span>,<span class=hljs-number >5</span>)
 target = torch.empty(<span class=hljs-number >3</span>,<span class=hljs-number >4</span>,<span class=hljs-number >5</span> dtype=torch.long).random_(<span class=hljs-number >0</span>,C) 
-<span class=hljs-keyword >assert</span> loss1(m(<span class=hljs-built_in >input</span>),target) == loss2(<span class=hljs-built_in >input</span>,target)</code></pre> <h2 id=quiz ><a href="#quiz" class=header-anchor >Quiz</a></h2> <p>To check you know your loss, you can do the <a href="https://dataflowr.github.io/quiz/module3.html">quizzes</a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<span class=hljs-keyword >assert</span> loss1(m(<span class=hljs-built_in >input</span>),target) == loss2(<span class=hljs-built_in >input</span>,target)</code></pre> <h2 id=quiz ><a href="#quiz" class=header-anchor >Quiz</a></h2> <p>To check you know your loss, you can do the <a href="https://dataflowr.github.io/quiz/module3.html">quizzes</a></p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/4-optimization-for-deep-learning/index.html b/modules/4-optimization-for-deep-learning/index.html
index 69a5a94..45e303e 100644
--- a/modules/4-optimization-for-deep-learning/index.html
+++ b/modules/4-optimization-for-deep-learning/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item active" href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_4_-_optimization_for_deep_leaning ><a href="#module_4_-_optimization_for_deep_leaning" class=header-anchor >Module 4 - Optimization for deep leaning</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#optimization_for_deep_leaning">Optimization for deep leaning</a><li><a href="#slides_and_practicals">Slides and Practicals</a><li><a href="#references">References</a></ol></div> <h2 id=optimization_for_deep_leaning ><a href="#optimization_for_deep_leaning" class=header-anchor >Optimization for deep leaning</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'UvM0hK4E2dc', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(31,0)'> 0:31</a> Plan <br> <a href='#player' onclick='changeYouTubeSource(74,0)'> 1:14</a> Optimization in deep learning <br> <a href='#player' onclick='changeYouTubeSource(224,0)'> 3:44</a> Gradient descent variants <br> <a href='#player' onclick='changeYouTubeSource(478,0)'> 7:58</a> Setting for the jupyter notebook <br> <a href='#player' onclick='changeYouTubeSource(589,0)'> 9:49</a> Vanilla gradient descent <br> <a href='#player' onclick='changeYouTubeSource(734,0)'> 12:14</a> Momentum <br> <a href='#player' onclick='changeYouTubeSource(938,0)'> 15:38</a> Nesterov accelerated gradient descent <br> <a href='#player' onclick='changeYouTubeSource(1080,0)'> 18:00</a> Adagrad <br> <a href='#player' onclick='changeYouTubeSource(1206,0)'> 20:06</a> RMSProp <br> <a href='#player' onclick='changeYouTubeSource(1331,0)'> 22:11</a> Adam <br> <a href='#player' onclick='changeYouTubeSource(1479,0)'> 24:39</a> AMSGrad <br> <a href='#player' onclick='changeYouTubeSource(1629,0)'> 27:09</a> Pytorch optimizers </p> <h2 id=slides_and_practicals ><a href="#slides_and_practicals" class=header-anchor >Slides and Practicals</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module4.html">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module4/04_gradient_descent_optimization_algorithms_empty.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module4/04_gradient_descent_optimization_algorithms_empty.ipynb">colab</a> Code your optimizers.</p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <ul> <li><p><a href="https://arxiv.org/abs/1609.04747">An overview of gradient descent optimization algorithms</a> by Sebastian Ruder</p> <li><p><a href="https://drive.google.com/file/d/1e_9W8q9PL20iqOR-pfK89eILc_VtYaw1/view">Gradient-based optimization</a> A short introduction to optimization in Deep Learning, by Christian S. Perone</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item active" href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_4_-_optimization_for_deep_leaning ><a href="#module_4_-_optimization_for_deep_leaning" class=header-anchor >Module 4 - Optimization for deep leaning</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#optimization_for_deep_leaning">Optimization for deep leaning</a><li><a href="#slides_and_practicals">Slides and Practicals</a><li><a href="#references">References</a></ol></div> <h2 id=optimization_for_deep_leaning ><a href="#optimization_for_deep_leaning" class=header-anchor >Optimization for deep leaning</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'UvM0hK4E2dc', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(31,0)'> 0:31</a> Plan <br> <a href='#player' onclick='changeYouTubeSource(74,0)'> 1:14</a> Optimization in deep learning <br> <a href='#player' onclick='changeYouTubeSource(224,0)'> 3:44</a> Gradient descent variants <br> <a href='#player' onclick='changeYouTubeSource(478,0)'> 7:58</a> Setting for the jupyter notebook <br> <a href='#player' onclick='changeYouTubeSource(589,0)'> 9:49</a> Vanilla gradient descent <br> <a href='#player' onclick='changeYouTubeSource(734,0)'> 12:14</a> Momentum <br> <a href='#player' onclick='changeYouTubeSource(938,0)'> 15:38</a> Nesterov accelerated gradient descent <br> <a href='#player' onclick='changeYouTubeSource(1080,0)'> 18:00</a> Adagrad <br> <a href='#player' onclick='changeYouTubeSource(1206,0)'> 20:06</a> RMSProp <br> <a href='#player' onclick='changeYouTubeSource(1331,0)'> 22:11</a> Adam <br> <a href='#player' onclick='changeYouTubeSource(1479,0)'> 24:39</a> AMSGrad <br> <a href='#player' onclick='changeYouTubeSource(1629,0)'> 27:09</a> Pytorch optimizers </p> <h2 id=slides_and_practicals ><a href="#slides_and_practicals" class=header-anchor >Slides and Practicals</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module4.html">slides</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module4/04_gradient_descent_optimization_algorithms_empty.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module4/04_gradient_descent_optimization_algorithms_empty.ipynb">colab</a> Code your optimizers.</p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <ul> <li><p><a href="https://arxiv.org/abs/1609.04747">An overview of gradient descent optimization algorithms</a> by Sebastian Ruder</p> <li><p><a href="https://drive.google.com/file/d/1e_9W8q9PL20iqOR-pfK89eILc_VtYaw1/view">Gradient-based optimization</a> A short introduction to optimization in Deep Learning, by Christian S. Perone</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/5-stacking-layers/index.html b/modules/5-stacking-layers/index.html
index 9c7819b..3497e17 100644
--- a/modules/5-stacking-layers/index.html
+++ b/modules/5-stacking-layers/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item active" href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_5_-_stacking_layers ><a href="#module_5_-_stacking_layers" class=header-anchor >Module 5 - Stacking layers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#stacking_layers">Stacking layers</a><li><a href="#slides">Slides</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=stacking_layers ><a href="#stacking_layers" class=header-anchor >Stacking layers</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'OiyZXdnLHcI', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(95,0)'> 1:35</a> Plan of the lesson: define a NN model <br> <a href='#player' onclick='changeYouTubeSource(144,0)'> 2:24</a> MLP with pytorch Sequential <br> <a href='#player' onclick='changeYouTubeSource(401,0)'> 6:41</a> Using Torch.nn.module <br> <a href='#player' onclick='changeYouTubeSource(608,0)'> 10:08</a> Writing a pytorch module </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module5.html">slides</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module5/Stacking_layers_MLP_CIFAR10.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module5/Stacking_layers_MLP_CIFAR10.ipynb">colab</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item active" href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_5_-_stacking_layers ><a href="#module_5_-_stacking_layers" class=header-anchor >Module 5 - Stacking layers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#stacking_layers">Stacking layers</a><li><a href="#slides">Slides</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=stacking_layers ><a href="#stacking_layers" class=header-anchor >Stacking layers</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'OiyZXdnLHcI', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(95,0)'> 1:35</a> Plan of the lesson: define a NN model <br> <a href='#player' onclick='changeYouTubeSource(144,0)'> 2:24</a> MLP with pytorch Sequential <br> <a href='#player' onclick='changeYouTubeSource(401,0)'> 6:41</a> Using Torch.nn.module <br> <a href='#player' onclick='changeYouTubeSource(608,0)'> 10:08</a> Writing a pytorch module </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module5.html">slides</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module5/Stacking_layers_MLP_CIFAR10.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module5/Stacking_layers_MLP_CIFAR10.ipynb">colab</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/6-convolutional-neural-network/index.html b/modules/6-convolutional-neural-network/index.html
index 45686de..1caf3b7 100644
--- a/modules/6-convolutional-neural-network/index.html
+++ b/modules/6-convolutional-neural-network/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_6_-_convolutional_neural_network ><a href="#module_6_-_convolutional_neural_network" class=header-anchor >Module 6 - Convolutional neural network</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#convolutional_neural_network">Convolutional neural network</a><li><a href="#notebook">Notebook</a><li><a href="#practicals">Practicals</a><li><a href="#post">Post</a></ol></div> <h2 id=convolutional_neural_network ><a href="#convolutional_neural_network" class=header-anchor >Convolutional neural network</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'HyotcucT-PE', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(52,0)'> 0:52</a> MNIST dataset <br> <a href='#player' onclick='changeYouTubeSource(176,0)'> 2:56</a> A simple binary classifier <br> <a href='#player' onclick='changeYouTubeSource(381,0)'> 6:21</a> Precision and recall <br> <a href='#player' onclick='changeYouTubeSource(524,0)'> 8:44</a> Filters and convolutions <br> <a href='#player' onclick='changeYouTubeSource(1180,0)'> 19:40</a> Max pooling </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">colab</a> Convolutions by examples.</p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <br> <a href='#player' onclick='changeYouTubeSource(1704,0)'> 28:24</a> Practicals: your first CNN <h2 id=post ><a href="#post" class=header-anchor >Post</a></h2> <ul> <li><p><a href="../extras/Convolutions_first/">Convolutions &#40;and Discrete Fourier Transform&#41; from first principles</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_6_-_convolutional_neural_network ><a href="#module_6_-_convolutional_neural_network" class=header-anchor >Module 6 - Convolutional neural network</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#convolutional_neural_network">Convolutional neural network</a><li><a href="#notebook">Notebook</a><li><a href="#practicals">Practicals</a><li><a href="#post">Post</a></ol></div> <h2 id=convolutional_neural_network ><a href="#convolutional_neural_network" class=header-anchor >Convolutional neural network</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'HyotcucT-PE', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(52,0)'> 0:52</a> MNIST dataset <br> <a href='#player' onclick='changeYouTubeSource(176,0)'> 2:56</a> A simple binary classifier <br> <a href='#player' onclick='changeYouTubeSource(381,0)'> 6:21</a> Precision and recall <br> <a href='#player' onclick='changeYouTubeSource(524,0)'> 8:44</a> Filters and convolutions <br> <a href='#player' onclick='changeYouTubeSource(1180,0)'> 19:40</a> Max pooling </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module6/06_convolution_digit_recognizer.ipynb">colab</a> Convolutions by examples.</p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <br> <a href='#player' onclick='changeYouTubeSource(1704,0)'> 28:24</a> Practicals: your first CNN <h2 id=post ><a href="#post" class=header-anchor >Post</a></h2> <ul> <li><p><a href="../extras/Convolutions_first/">Convolutions &#40;and Discrete Fourier Transform&#41; from first principles</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/7-dataloading/index.html b/modules/7-dataloading/index.html
index a2022ae..27582e3 100644
--- a/modules/7-dataloading/index.html
+++ b/modules/7-dataloading/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item active" href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_7_-_dataloading ><a href="#module_7_-_dataloading" class=header-anchor >Module 7 - Dataloading</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#dataloading">Dataloading</a><li><a href="#slides">Slides</a></ol></div> <h2 id=dataloading ><a href="#dataloading" class=header-anchor >Dataloading</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'vm-ZusIUkiY', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(69,0)'> 1:09</a> Plan of the lesson <br> <a href='#player' onclick='changeYouTubeSource(128,0)'> 2:08</a> Dataloading <br> <a href='#player' onclick='changeYouTubeSource(280,0)'> 4:40</a> Example 1: torchvision.datasets.Imagefolder <br> <a href='#player' onclick='changeYouTubeSource(585,0)'> 9:45</a> Example 2: dataset from numpy arrays <br> <a href='#player' onclick='changeYouTubeSource(887,0)'> 14:47</a> Example 3: custom dataloader </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module7.html">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item active" href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_7_-_dataloading ><a href="#module_7_-_dataloading" class=header-anchor >Module 7 - Dataloading</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#dataloading">Dataloading</a><li><a href="#slides">Slides</a></ol></div> <h2 id=dataloading ><a href="#dataloading" class=header-anchor >Dataloading</a></h2> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'vm-ZusIUkiY', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <p><br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap <br> <a href='#player' onclick='changeYouTubeSource(69,0)'> 1:09</a> Plan of the lesson <br> <a href='#player' onclick='changeYouTubeSource(128,0)'> 2:08</a> Dataloading <br> <a href='#player' onclick='changeYouTubeSource(280,0)'> 4:40</a> Example 1: torchvision.datasets.Imagefolder <br> <a href='#player' onclick='changeYouTubeSource(585,0)'> 9:45</a> Example 2: dataset from numpy arrays <br> <a href='#player' onclick='changeYouTubeSource(887,0)'> 14:47</a> Example 3: custom dataloader </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module7.html">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/8a-embedding-layers/index.html b/modules/8a-embedding-layers/index.html
index b195fd3..619e9fb 100644
--- a/modules/8a-embedding-layers/index.html
+++ b/modules/8a-embedding-layers/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item active" href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_8a_-_embedding_layers ><a href="#module_8a_-_embedding_layers" class=header-anchor >Module 8a - Embedding layers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#embedding_layers">Embedding layers</a><li><a href="#slides">Slides</a></ol></div> <h2 id=embedding_layers ><a href="#embedding_layers" class=header-anchor >Embedding layers</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'vm-ZusIUkiY', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(1066,0)'> 17:46</a> Dealing with symbolic data <br> <a href='#player' onclick='changeYouTubeSource(1111,0)'> 18:31</a> One-hot encoding <br> <a href='#player' onclick='changeYouTubeSource(1366,0)'> 22:46</a> Embeddings <br> <a href='#player' onclick='changeYouTubeSource(1660,0)'> 27:40</a> Pytorch sparse layer </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module8a.html">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item active" href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_8a_-_embedding_layers ><a href="#module_8a_-_embedding_layers" class=header-anchor >Module 8a - Embedding layers</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#embedding_layers">Embedding layers</a><li><a href="#slides">Slides</a></ol></div> <h2 id=embedding_layers ><a href="#embedding_layers" class=header-anchor >Embedding layers</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'vm-ZusIUkiY', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(1066,0)'> 17:46</a> Dealing with symbolic data <br> <a href='#player' onclick='changeYouTubeSource(1111,0)'> 18:31</a> One-hot encoding <br> <a href='#player' onclick='changeYouTubeSource(1366,0)'> 22:46</a> Embeddings <br> <a href='#player' onclick='changeYouTubeSource(1660,0)'> 27:40</a> Pytorch sparse layer </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module8a.html">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/8b-collaborative-filtering/index.html b/modules/8b-collaborative-filtering/index.html
index da0a45e..ea87c22 100644
--- a/modules/8b-collaborative-filtering/index.html
+++ b/modules/8b-collaborative-filtering/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_8b_-_collaborative_filtering ><a href="#module_8b_-_collaborative_filtering" class=header-anchor >Module 8b - Collaborative filtering</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#collaborative_filtering">Collaborative filtering</a><li><a href="#notebook">Notebook</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=collaborative_filtering ><a href="#collaborative_filtering" class=header-anchor >Collaborative filtering</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'TaEWBB00Els', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Collaborative filtering <br> <a href='#player' onclick='changeYouTubeSource(410,0)'> 6:50</a> Movielens dataset: data wrangling with pandas <br> <a href='#player' onclick='changeYouTubeSource(696,0)'> 11:36</a> Test/train split with sklearn <br> <a href='#player' onclick='changeYouTubeSource(831,0)'> 13:51</a> The dot model neural network <br> <a href='#player' onclick='changeYouTubeSource(1143,0)'> 19:03</a> Checking your model <br> <a href='#player' onclick='changeYouTubeSource(1279,0)'> 21:19</a> Coding the training loop <br> <a href='#player' onclick='changeYouTubeSource(1309,0)'> 21:49</a> Checking your training loop <br> <a href='#player' onclick='changeYouTubeSource(1407,0)'> 23:27</a> FactorizationModel: a deep learning framework <br> <a href='#player' onclick='changeYouTubeSource(1656,0)'> 27:36</a> Checking your FactorizationModel <br> <a href='#player' onclick='changeYouTubeSource(1855,0)'> 30:55</a> Sorting the movies <br> <a href='#player' onclick='changeYouTubeSource(1980,0)'> 33:00</a> PCA of movies embeddings <br> <a href='#player' onclick='changeYouTubeSource(2200,0The)'> 36:40</a> SPOTLIGHT lib </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb">colab</a> Collaborative filtering.</p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <br> <a href='#player' onclick='changeYouTubeSource(831,0)'> 13:51</a> Start with your implementation of the dot model <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb">colab</a> refactoring the code.</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_8b_-_collaborative_filtering ><a href="#module_8b_-_collaborative_filtering" class=header-anchor >Module 8b - Collaborative filtering</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#collaborative_filtering">Collaborative filtering</a><li><a href="#notebook">Notebook</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=collaborative_filtering ><a href="#collaborative_filtering" class=header-anchor >Collaborative filtering</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'TaEWBB00Els', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Collaborative filtering <br> <a href='#player' onclick='changeYouTubeSource(410,0)'> 6:50</a> Movielens dataset: data wrangling with pandas <br> <a href='#player' onclick='changeYouTubeSource(696,0)'> 11:36</a> Test/train split with sklearn <br> <a href='#player' onclick='changeYouTubeSource(831,0)'> 13:51</a> The dot model neural network <br> <a href='#player' onclick='changeYouTubeSource(1143,0)'> 19:03</a> Checking your model <br> <a href='#player' onclick='changeYouTubeSource(1279,0)'> 21:19</a> Coding the training loop <br> <a href='#player' onclick='changeYouTubeSource(1309,0)'> 21:49</a> Checking your training loop <br> <a href='#player' onclick='changeYouTubeSource(1407,0)'> 23:27</a> FactorizationModel: a deep learning framework <br> <a href='#player' onclick='changeYouTubeSource(1656,0)'> 27:36</a> Checking your FactorizationModel <br> <a href='#player' onclick='changeYouTubeSource(1855,0)'> 30:55</a> Sorting the movies <br> <a href='#player' onclick='changeYouTubeSource(1980,0)'> 33:00</a> PCA of movies embeddings <br> <a href='#player' onclick='changeYouTubeSource(2200,0The)'> 36:40</a> SPOTLIGHT lib </p> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_empty.ipynb">colab</a> Collaborative filtering.</p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <br> <a href='#player' onclick='changeYouTubeSource(831,0)'> 13:51</a> Start with your implementation of the dot model <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module8/08_collaborative_filtering_1M.ipynb">colab</a> refactoring the code.</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/8c-word2vec/index.html b/modules/8c-word2vec/index.html
index 19c6e01..dd54a2b 100644
--- a/modules/8c-word2vec/index.html
+++ b/modules/8c-word2vec/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item active" href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_8c_-_word2vec ><a href="#module_8c_-_word2vec" class=header-anchor >Module 8c - Word2vec</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#practicals">Practicals</a><li><a href="#references">References</a></ol></div> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_Word2vec_pytorch_empty.ipynb">Word Embedding with Word2vec</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_word2vec.ipynb">implementing word2vec as matrix factorization</a></p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <p>-<a href="https://arxiv.org/abs/1402.3722">word2vec Explained: deriving Mikolov et al.&#39;s negative-sampling word-embedding method</a> by Yoav Goldberg and Omer Levy</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item active" href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_8c_-_word2vec ><a href="#module_8c_-_word2vec" class=header-anchor >Module 8c - Word2vec</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#practicals">Practicals</a><li><a href="#references">References</a></ol></div> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_Word2vec_pytorch_empty.ipynb">Word Embedding with Word2vec</a></p> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module8/08_word2vec.ipynb">implementing word2vec as matrix factorization</a></p> </ul> <h2 id=references ><a href="#references" class=header-anchor >References</a></h2> <p>-<a href="https://arxiv.org/abs/1402.3722">word2vec Explained: deriving Mikolov et al.&#39;s negative-sampling word-embedding method</a> by Yoav Goldberg and Omer Levy</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/9a-autoencoders/index.html b/modules/9a-autoencoders/index.html
index 3baae1e..11e3b25 100644
--- a/modules/9a-autoencoders/index.html
+++ b/modules/9a-autoencoders/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item active" href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_9a_-_autoencoders ><a href="#module_9a_-_autoencoders" class=header-anchor >Module 9a - Autoencoders</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#autoencoders">Autoencoders</a><li><a href="#slides">Slides</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=autoencoders ><a href="#autoencoders" class=header-anchor >Autoencoders</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Z1OWiTFafpQ', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap and unsupervised learning <br> <a href='#player' onclick='changeYouTubeSource(139,0)'> 2:19</a> Plan <br> <a href='#player' onclick='changeYouTubeSource(189,0)'> 3:09</a> Theory of autoencoders <br> <a href='#player' onclick='changeYouTubeSource(449,0)'> 7:29</a> Practice of autoencoders in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(679,0)'> 11:19</a> Representation learning with autoencoders <br> <a href='#player' onclick='changeYouTubeSource(955,0)'> 15:55</a> Practicals <br> <a href='#player' onclick='changeYouTubeSource(1009,0)'> 16:49</a> A simple autoencoder <br> <a href='#player' onclick='changeYouTubeSource(1210,0)'> 20:10</a> Stacked autoencoders <br> <a href='#player' onclick='changeYouTubeSource(1336,0)'> 22:16</a> Interpolation <br> <a href='#player' onclick='changeYouTubeSource(1349,0)'> 22:29</a> Denoising autoencoder </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module9.html">slides</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb">Autoencoders and Noisy Autoencoders</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb">colab</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item active" href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_9a_-_autoencoders ><a href="#module_9a_-_autoencoders" class=header-anchor >Module 9a - Autoencoders</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#autoencoders">Autoencoders</a><li><a href="#slides">Slides</a><li><a href="#practicals">Practicals</a></ol></div> <h2 id=autoencoders ><a href="#autoencoders" class=header-anchor >Autoencoders</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Z1OWiTFafpQ', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Recap and unsupervised learning <br> <a href='#player' onclick='changeYouTubeSource(139,0)'> 2:19</a> Plan <br> <a href='#player' onclick='changeYouTubeSource(189,0)'> 3:09</a> Theory of autoencoders <br> <a href='#player' onclick='changeYouTubeSource(449,0)'> 7:29</a> Practice of autoencoders in PyTorch <br> <a href='#player' onclick='changeYouTubeSource(679,0)'> 11:19</a> Representation learning with autoencoders <br> <a href='#player' onclick='changeYouTubeSource(955,0)'> 15:55</a> Practicals <br> <a href='#player' onclick='changeYouTubeSource(1009,0)'> 16:49</a> A simple autoencoder <br> <a href='#player' onclick='changeYouTubeSource(1210,0)'> 20:10</a> Stacked autoencoders <br> <a href='#player' onclick='changeYouTubeSource(1336,0)'> 22:16</a> Interpolation <br> <a href='#player' onclick='changeYouTubeSource(1349,0)'> 22:29</a> Denoising autoencoder </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/module9.html">slides</a></p> </ul> <h2 id=practicals ><a href="#practicals" class=header-anchor >Practicals</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb">Autoencoders and Noisy Autoencoders</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module9/09_AE_NoisyAE.ipynb">colab</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/9b-unet/index.html b/modules/9b-unet/index.html
index fdd26ca..262aafa 100644
--- a/modules/9b-unet/index.html
+++ b/modules/9b-unet/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item active" href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_9b_-_unets ><a href="#module_9b_-_unets" class=header-anchor >Module 9b - UNets</a></h1> <p><img src="../extras/unet/unet.png" alt="" /></p> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module9/UNet_image_seg.ipynb">UNet for image segmentation</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item active" href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_9b_-_unets ><a href="#module_9b_-_unets" class=header-anchor >Module 9b - UNets</a></h1> <p><img src="../extras/unet/unet.png" alt="" /></p> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/Module9/UNet_image_seg.ipynb">UNet for image segmentation</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/9c-flows/index.html b/modules/9c-flows/index.html
index aa148e9..fee7a00 100644
--- a/modules/9c-flows/index.html
+++ b/modules/9c-flows/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item active" href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_9c_-_flows ><a href="#module_9c_-_flows" class=header-anchor >Module 9c - Flows</a></h1> <p><img src="../extras/flows/Real_NVP.png" alt="" /></p> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#density_estimation_using_real_nvp">Density estimation using Real NVP</a><li><a href="#implementation_of_real_nvp">Implementation of Real NVP</a></ol></div> <h1 id=normalizing_flows ><a href="#normalizing_flows" class=header-anchor >Normalizing flows</a></h1> <p>The image below is taken from this very good blog post on normalizing flows: <a href="https://lilianweng.github.io/lil-log/2018/10/13/flow-based-deep-generative-models.html">blogpost</a></p> <p><img src="../extras/flows/three-generative-models.png" alt="" /></p> <p>Here we only describe flow-based generative models, you can have look at <a href="/website/homework/3-VAE">VAE</a> and <a href="/website/modules/10-generative-adversarial-networks">GAN</a>.</p> <p>A <strong>flow-based generative model</strong> is constructed by a sequence of <strong>invertible</strong> transformations. The main advantage of flows is that the model explicitly learns the data distribution \(p(\mathbf{x})\) and therefore the loss function is simply the negative log-likelihood.</p> <p>Given a sample \(\mathbf{x}\) and a prior \(p(\mathbf{z})\), we compute \(f(\mathbf{x}) = \mathbf{z}\) with an invertible function \(f\) that will be learned. Given \(f\) and the prior \(p(\mathbf{z})\), we can compute the evidence \(p(\mathbf{x})\) thanks to the change of variable formula:</p> <div class=nonumber >\[\begin{aligned} \mathbf{z} &\sim p(\mathbf{z}), \mathbf{z} = f(\mathbf{x}), \\ p(\mathbf{x}) &= p(\mathbf{z}) \left\vert \det \dfrac{d \mathbf{z}}{d \mathbf{x}} \right\vert = p(f(\mathbf{x})) \left\vert \det \dfrac{\partial f(\mathbf{x})}{\partial \mathbf{x}} \right\vert \end{aligned}\]</div> <p>where \(\dfrac{\partial f(\mathbf{x})}{\partial \mathbf{x}}\) is the Jacobian matrix of \(f\). Recall that given a function mapping a \(n\)-dimensional input vector \(\mathbf{x}\) to a \(m\)-dimensional output vector, \(f: \mathbb{R}^n \mapsto \mathbb{R}^m\), the matrix of all first-order partial derivatives of this function is called the <strong>Jacobian matrix</strong>, \(J_f\) where one entry on the i-th row and j-th column is \((J_f(\mathbf{x}))_{ij} = \frac{\partial f_i(\mathbf{x})}{\partial x_j}\):</p> <div class=nonumber >\[\begin{aligned} {J_f(\mathbf{x})} = \begin{bmatrix} \frac{\partial f_1(\mathbf{x})}{\partial x_1} & \dots & \frac{\partial f_1(\mathbf{x})}{\partial x_n} \\[6pt] \vdots & \ddots & \vdots \\[6pt] \frac{\partial f_m(\mathbf{x})}{\partial x_1} & \dots & \frac{\partial f_m(\mathbf{x})}{\partial x_n} \\[6pt] \end{bmatrix} \end{aligned}\]</div> <p>Below, we will parametrize \(f\) with a neural network and learn \(f\) by maximizing \(\ln p(\mathbf{x})\). More precisely, given a dataset \((\mathbf{x}_1,\dots,\mathbf{x}_n)\) and a model provided by a prior \(p(\mathbf{z})\) and a neural network \(f\), we optimize the weights of \(f\) by minimizing:</p> <div class=nonumber >\[\begin{aligned} -\sum_{i}\ln p(\mathbf{x_i}) = \sum_i -\ln p(f(\mathbf{x}_i)) -\ln\left\vert \det \dfrac{\partial f(\mathbf{x}_i)}{\partial \mathbf{x}} \right\vert. \end{aligned}\]</div> <p><strong>We need to ensure that \(f\) is always invertible and that the determinant is simple to compute.</strong></p> <h2 id=density_estimation_using_real_nvp ><a href="#density_estimation_using_real_nvp" class=header-anchor >Density estimation using Real NVP</a></h2> <p><a href="https://arxiv.org/abs/1605.08803">Real NVP</a> &#40;introduced by Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio in 2016&#41; uses function \(f\) obtained by stacking affine coupling layers which for an input \(\mathbf{x}\in \mathbb{R}^D\) produce the output \(\mathbf{y}\in\mathbb{R}^D\) defined by &#40;with \( d < D \) &#41;: </p> <a id=eqaff  class=anchor ></a>\[\begin{aligned} \mathbf{y}_{1:d} &= \mathbf{x}_{1:d}\\ \mathbf{y}_{d+1:D} &= \mathbf{x}_{d+1:D} \odot \exp\left(s(\mathbf{x}_{1:d})\right) +t(\mathbf{x}_{1:d}) , \end{aligned}\] <p>where \(s\) &#40;scale&#41; and \(t\) &#40;translation&#41; are neural networks mapping \(\mathbb{R}^d\) to \(\mathbb{R}^{D-d}\) and \(\odot\) is the element-wise product.</p> <p>For any functions \(s\) and \(t\), the affine coupling layer is invertible:</p> <div class=nonumber >\[\begin{aligned} \begin{cases} \mathbf{y}_{1:d} &= \mathbf{x}_{1:d} \\ \mathbf{y}_{d+1:D} &= \mathbf{x}_{d+1:D} \odot \exp({s(\mathbf{x}_{1:d})}) + t(\mathbf{x}_{1:d}) \end{cases} \Leftrightarrow \begin{cases} \mathbf{x}_{1:d} &= \mathbf{y}_{1:d} \\ \mathbf{x}_{d+1:D} &= (\mathbf{y}_{d+1:D} - t(\mathbf{y}_{1:d})) \odot \exp(-s(\mathbf{y}_{1:d})) \end{cases} \end{aligned}\]</div> <p>The Jacobian of an affine coupling layer is a lower triangular matrix:</p> <div class=nonumber >\[\begin{aligned} J(\mathbf{x}) = \frac{\partial \mathbf{y}}{\partial \mathbf{x}}= \begin{bmatrix} \mathbb{I}_d & \mathbf{0}_{d\times(D-d)} \\[5pt] \frac{\partial \mathbf{y}_{d+1:D}}{\partial \mathbf{x}_{1:d}} & \text{diag}(\exp(s(\mathbf{x}_{1:d}))) \end{bmatrix} \end{aligned}\]</div> <p>Hence the determinant is simply the product of terms on the diagonal:</p> <div class=nonumber >\[\begin{aligned} \left\vert\det(J(\mathbf{x}))\right\vert = \prod_{j=1}^{D-d}\exp(s(\mathbf{x}_{1:d}))_j = \exp\left(\sum_{j=1}^{D-d} s(\mathbf{x}_{1:d})_j\right) \end{aligned}\]</div> <p>Note that, we do not need to compute the Jacobian of \(s\) or \(t\) and to compute \(f^{-1}\), we do not need to compute the inverse of \(s\) or \(t\) &#40;which might not exist&#33;&#41;. In other words, we can take arbitrary complex functions for \(s\) and \(t\).</p> <p>In one affine coupling layer, some dimensions &#40;channels&#41; remain unchanged. To make sure all the inputs have a chance to be altered, the model reverses the ordering in each layer so that different components are left unchanged. Following such an alternating pattern, the set of units which remain identical in one transformation layer are always modified in the next. </p> <p>This can be implemented with binary masks. First, we can extend the scale and neural networks to mappings form \(\mathbb{R}^D\) to \(\mathbb{R}^D\). Then taking a mask \(\mathbf{b} = (1,\dots,1,0,\dots,0)\) with \(d\) ones, so that we have for the affine layer:</p> <div class=nonumber >\[\begin{aligned} \mathbf{y} = \mathbf{x} \odot \exp\big((1-\mathbf{b}) \odot s(\mathbf{b} \odot \mathbf{x})\big) + (1-\mathbf{b}) \odot t(\mathbf{b} \odot \mathbf{x}). \end{aligned}\]</div> <p>Note that we have</p> <div class=nonumber >\[\begin{aligned} \ln \left\vert\det(J(\mathbf{x}))\right\vert = \sum_{j=1}^{D} \Big((1-\mathbf{b})\odot s(\mathbf{b} \odot \mathbf{x})\Big)_j, \end{aligned}\]</div> <p>and to invert the affine layer:</p> <div class=nonumber >\[\begin{aligned} \mathbf{x} = \left( \mathbf{y} -(1-\mathbf{b}) \odot t(\mathbf{b} \odot \mathbf{y})\right)\odot \exp\left( -(1-\mathbf{b}) \odot s(\mathbf{b} \odot \mathbf{y})\right). \end{aligned}\]</div> <p>Now we alternates the binary mask \(\mathbf{b}\) from one coupling layer to the other. </p> <p>Note, that the formula given in the paper is slightly different:</p> \[\mathbf{y} = \mathbf{b} \odot \mathbf{x} + (1 - \mathbf{b}) \odot \Big(\mathbf{x} \odot \exp\big(s(\mathbf{b} \odot \mathbf{x})\big) + t(\mathbf{b} \odot \mathbf{x})\Big),\] <p>but the 2 formulas give the same result&#33;</p> <h2 id=implementation_of_real_nvp ><a href="#implementation_of_real_nvp" class=header-anchor >Implementation of Real NVP</a></h2> <ul> <li><p>you can now implement your <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_empty.ipynb">own NVP</a></p> <li><p>and here is the <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_sol.ipynb">solution</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div> <script src="/website/libs/katex/katex.min.js"></script> <script src="/website/libs/katex/auto-render.min.js"></script> <script>renderMathInElement(document.body)</script>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item active" href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_9c_-_flows ><a href="#module_9c_-_flows" class=header-anchor >Module 9c - Flows</a></h1> <p><img src="../extras/flows/Real_NVP.png" alt="" /></p> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#density_estimation_using_real_nvp">Density estimation using Real NVP</a><li><a href="#implementation_of_real_nvp">Implementation of Real NVP</a></ol></div> <h1 id=normalizing_flows ><a href="#normalizing_flows" class=header-anchor >Normalizing flows</a></h1> <p>The image below is taken from this very good blog post on normalizing flows: <a href="https://lilianweng.github.io/lil-log/2018/10/13/flow-based-deep-generative-models.html">blogpost</a></p> <p><img src="../extras/flows/three-generative-models.png" alt="" /></p> <p>Here we only describe flow-based generative models, you can have look at <a href="/website/homework/3-VAE">VAE</a> and <a href="/website/modules/10-generative-adversarial-networks">GAN</a>.</p> <p>A <strong>flow-based generative model</strong> is constructed by a sequence of <strong>invertible</strong> transformations. The main advantage of flows is that the model explicitly learns the data distribution \(p(\mathbf{x})\) and therefore the loss function is simply the negative log-likelihood.</p> <p>Given a sample \(\mathbf{x}\) and a prior \(p(\mathbf{z})\), we compute \(f(\mathbf{x}) = \mathbf{z}\) with an invertible function \(f\) that will be learned. Given \(f\) and the prior \(p(\mathbf{z})\), we can compute the evidence \(p(\mathbf{x})\) thanks to the change of variable formula:</p> <div class=nonumber >\[\begin{aligned} \mathbf{z} &\sim p(\mathbf{z}), \mathbf{z} = f(\mathbf{x}), \\ p(\mathbf{x}) &= p(\mathbf{z}) \left\vert \det \dfrac{d \mathbf{z}}{d \mathbf{x}} \right\vert = p(f(\mathbf{x})) \left\vert \det \dfrac{\partial f(\mathbf{x})}{\partial \mathbf{x}} \right\vert \end{aligned}\]</div> <p>where \(\dfrac{\partial f(\mathbf{x})}{\partial \mathbf{x}}\) is the Jacobian matrix of \(f\). Recall that given a function mapping a \(n\)-dimensional input vector \(\mathbf{x}\) to a \(m\)-dimensional output vector, \(f: \mathbb{R}^n \mapsto \mathbb{R}^m\), the matrix of all first-order partial derivatives of this function is called the <strong>Jacobian matrix</strong>, \(J_f\) where one entry on the i-th row and j-th column is \((J_f(\mathbf{x}))_{ij} = \frac{\partial f_i(\mathbf{x})}{\partial x_j}\):</p> <div class=nonumber >\[\begin{aligned} {J_f(\mathbf{x})} = \begin{bmatrix} \frac{\partial f_1(\mathbf{x})}{\partial x_1} & \dots & \frac{\partial f_1(\mathbf{x})}{\partial x_n} \\[6pt] \vdots & \ddots & \vdots \\[6pt] \frac{\partial f_m(\mathbf{x})}{\partial x_1} & \dots & \frac{\partial f_m(\mathbf{x})}{\partial x_n} \\[6pt] \end{bmatrix} \end{aligned}\]</div> <p>Below, we will parametrize \(f\) with a neural network and learn \(f\) by maximizing \(\ln p(\mathbf{x})\). More precisely, given a dataset \((\mathbf{x}_1,\dots,\mathbf{x}_n)\) and a model provided by a prior \(p(\mathbf{z})\) and a neural network \(f\), we optimize the weights of \(f\) by minimizing:</p> <div class=nonumber >\[\begin{aligned} -\sum_{i}\ln p(\mathbf{x_i}) = \sum_i -\ln p(f(\mathbf{x}_i)) -\ln\left\vert \det \dfrac{\partial f(\mathbf{x}_i)}{\partial \mathbf{x}} \right\vert. \end{aligned}\]</div> <p><strong>We need to ensure that \(f\) is always invertible and that the determinant is simple to compute.</strong></p> <h2 id=density_estimation_using_real_nvp ><a href="#density_estimation_using_real_nvp" class=header-anchor >Density estimation using Real NVP</a></h2> <p><a href="https://arxiv.org/abs/1605.08803">Real NVP</a> &#40;introduced by Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio in 2016&#41; uses function \(f\) obtained by stacking affine coupling layers which for an input \(\mathbf{x}\in \mathbb{R}^D\) produce the output \(\mathbf{y}\in\mathbb{R}^D\) defined by &#40;with \( d < D \) &#41;: </p> <a id=eqaff  class=anchor ></a>\[\begin{aligned} \mathbf{y}_{1:d} &= \mathbf{x}_{1:d}\\ \mathbf{y}_{d+1:D} &= \mathbf{x}_{d+1:D} \odot \exp\left(s(\mathbf{x}_{1:d})\right) +t(\mathbf{x}_{1:d}) , \end{aligned}\] <p>where \(s\) &#40;scale&#41; and \(t\) &#40;translation&#41; are neural networks mapping \(\mathbb{R}^d\) to \(\mathbb{R}^{D-d}\) and \(\odot\) is the element-wise product.</p> <p>For any functions \(s\) and \(t\), the affine coupling layer is invertible:</p> <div class=nonumber >\[\begin{aligned} \begin{cases} \mathbf{y}_{1:d} &= \mathbf{x}_{1:d} \\ \mathbf{y}_{d+1:D} &= \mathbf{x}_{d+1:D} \odot \exp({s(\mathbf{x}_{1:d})}) + t(\mathbf{x}_{1:d}) \end{cases} \Leftrightarrow \begin{cases} \mathbf{x}_{1:d} &= \mathbf{y}_{1:d} \\ \mathbf{x}_{d+1:D} &= (\mathbf{y}_{d+1:D} - t(\mathbf{y}_{1:d})) \odot \exp(-s(\mathbf{y}_{1:d})) \end{cases} \end{aligned}\]</div> <p>The Jacobian of an affine coupling layer is a lower triangular matrix:</p> <div class=nonumber >\[\begin{aligned} J(\mathbf{x}) = \frac{\partial \mathbf{y}}{\partial \mathbf{x}}= \begin{bmatrix} \mathbb{I}_d & \mathbf{0}_{d\times(D-d)} \\[5pt] \frac{\partial \mathbf{y}_{d+1:D}}{\partial \mathbf{x}_{1:d}} & \text{diag}(\exp(s(\mathbf{x}_{1:d}))) \end{bmatrix} \end{aligned}\]</div> <p>Hence the determinant is simply the product of terms on the diagonal:</p> <div class=nonumber >\[\begin{aligned} \left\vert\det(J(\mathbf{x}))\right\vert = \prod_{j=1}^{D-d}\exp(s(\mathbf{x}_{1:d}))_j = \exp\left(\sum_{j=1}^{D-d} s(\mathbf{x}_{1:d})_j\right) \end{aligned}\]</div> <p>Note that, we do not need to compute the Jacobian of \(s\) or \(t\) and to compute \(f^{-1}\), we do not need to compute the inverse of \(s\) or \(t\) &#40;which might not exist&#33;&#41;. In other words, we can take arbitrary complex functions for \(s\) and \(t\).</p> <p>In one affine coupling layer, some dimensions &#40;channels&#41; remain unchanged. To make sure all the inputs have a chance to be altered, the model reverses the ordering in each layer so that different components are left unchanged. Following such an alternating pattern, the set of units which remain identical in one transformation layer are always modified in the next. </p> <p>This can be implemented with binary masks. First, we can extend the scale and neural networks to mappings form \(\mathbb{R}^D\) to \(\mathbb{R}^D\). Then taking a mask \(\mathbf{b} = (1,\dots,1,0,\dots,0)\) with \(d\) ones, so that we have for the affine layer:</p> <div class=nonumber >\[\begin{aligned} \mathbf{y} = \mathbf{x} \odot \exp\big((1-\mathbf{b}) \odot s(\mathbf{b} \odot \mathbf{x})\big) + (1-\mathbf{b}) \odot t(\mathbf{b} \odot \mathbf{x}). \end{aligned}\]</div> <p>Note that we have</p> <div class=nonumber >\[\begin{aligned} \ln \left\vert\det(J(\mathbf{x}))\right\vert = \sum_{j=1}^{D} \Big((1-\mathbf{b})\odot s(\mathbf{b} \odot \mathbf{x})\Big)_j, \end{aligned}\]</div> <p>and to invert the affine layer:</p> <div class=nonumber >\[\begin{aligned} \mathbf{x} = \left( \mathbf{y} -(1-\mathbf{b}) \odot t(\mathbf{b} \odot \mathbf{y})\right)\odot \exp\left( -(1-\mathbf{b}) \odot s(\mathbf{b} \odot \mathbf{y})\right). \end{aligned}\]</div> <p>Now we alternates the binary mask \(\mathbf{b}\) from one coupling layer to the other. </p> <p>Note, that the formula given in the paper is slightly different:</p> \[\mathbf{y} = \mathbf{b} \odot \mathbf{x} + (1 - \mathbf{b}) \odot \Big(\mathbf{x} \odot \exp\big(s(\mathbf{b} \odot \mathbf{x})\big) + t(\mathbf{b} \odot \mathbf{x})\Big),\] <p>but the 2 formulas give the same result&#33;</p> <h2 id=implementation_of_real_nvp ><a href="#implementation_of_real_nvp" class=header-anchor >Implementation of Real NVP</a></h2> <ul> <li><p>you can now implement your <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_empty.ipynb">own NVP</a></p> <li><p>and here is the <a href="https://github.com/dataflowr/notebooks/blob/master/Module9/Normalizing_flows_sol.ipynb">solution</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div> <script src="/website/libs/katex/katex.min.js"></script> <script src="/website/libs/katex/auto-render.min.js"></script> <script>renderMathInElement(document.body)</script>
\ No newline at end of file
diff --git a/modules/extras/Convolutions_first/index.html b/modules/extras/Convolutions_first/index.html
index af7ec56..5f5e260 100644
--- a/modules/extras/Convolutions_first/index.html
+++ b/modules/extras/Convolutions_first/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Convolutions from first principles</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=convolutions_and_discrete_fourier_transform_from_first_principles ><a href="#convolutions_and_discrete_fourier_transform_from_first_principles" class=header-anchor >Convolutions &#40;and Discrete Fourier Transform&#41; from first principles</a></h1> <p><strong>author: <a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, course: <a href="https://dataflowr.github.io/website/">dataflowr</a>, module: <a href="https://dataflowr.github.io/website/modules/6-convolutional-neural-network/">Convolutional neural network</a></strong></p> <p>date: June 8, 2021</p> <h2 id=motivation ><a href="#motivation" class=header-anchor >Motivation</a></h2> <p>In the <a href="https://dataflowr.github.io/website/modules/6-convolutional-neural-network/">module on CNN</a>, we presented the convolutional layers as learnable filters. In particular, we have seen that these layers have a particular form of weight sharing &#40;only the parameters of the kernel need to be learned&#41;. The motivation for restricting our attention to this particular weight sharing comes from a long history in signal processing. Here, we would like to recover the intuition for convolutions from first principles.</p> <p>So let&#39;s pretend, we do not know anything about signal processing and we would like to build from scratch a new neural network taking as input an image and producing as output another image. For example in semantic segmentation, each pixel in the input image is linked to a class as shown below &#40;source: <a href="https://youtu.be/ATlcEDSPWXY">DeepLab</a>&#41;: <img src="../conv_files/deeplabcityscape.gif" alt=gif  /></p> <p>Clearly in this case, when a object moves in the image, we want the associated labels to move with it. Hence, before constructing such a neural network, we first need to figure out a way to build a layer having this property: when an object is translated in an image, the output of the layer should be translated with the same translation. This is what we will do here.</p> <h2 id=mathematical_model ><a href="#mathematical_model" class=header-anchor >Mathematical model</a></h2> <p>Here we formalize our problem and simplify it a little bit while keeping its main features. First, instead of images, we will deal with 1D signal <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> of length <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mn>0</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf x}=(x_0,\dots, x_{n-1})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>. Now translation in 1D is also called a shift: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><mi>S</mi><mi mathvariant=bold >x</mi><msub><mo stretchy=false >)</mo><mi>i</mi></msub><mo>=</mo><msub><mi>x</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">(S{\bf x})_{i} = x_{i-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> corresponds to the shift to the right. Note that we also need to define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><mi>S</mi><mi mathvariant=bold >x</mi><msub><mo stretchy=false >)</mo><mn>0</mn></msub></mrow><annotation encoding="application/x-tex">(S{\bf x})_0</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> in order to keep a signal of length <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>. We will always deal <strong>with indices as integers modulo <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span></strong> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mrow><mo>−</mo><mn>1</mn></mrow></msub><mo>=</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">x_{-1} = x_{n-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> and we define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>S</mi><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>0</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>2</mn></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">S{\bf x} = (x_{n-1}, x_0, \dots, x_{n-2})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>. Note that we can write <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>S</mi></mrow><annotation encoding="application/x-tex">S</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span></span></span></span> as a <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi><mo>×</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">n\times n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.66666em;vertical-align:-0.08333em;"></span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> matrix:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>S</mi><mo>=</mo><mrow><mo fence=true >(</mo><mtable rowspacing=0.15999999999999992em  columnalign="center center center center center" columnspacing=1em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd></mtr></mtable><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> S = \left( \begin{array}{ccccc} 0&amp;\dots&amp;\dots&amp;0&amp;1\\ 1&amp;\ddots&amp;&amp;&amp;0\\ 0&amp;1&amp;\ddots&amp;&amp;\vdots\\ \vdots &amp;\ddots&amp;\ddots&amp;\ddots&amp;\\ 0&amp;\dots&amp;0&amp;1&amp;0\end{array}\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:7.32em;vertical-align:-3.4099999999999993em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.8500500000000004em;"><span style="top:-0.44997000000000076em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-1.5999800000000008em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-2.1949900000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-2.7900000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.3850100000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.9800200000000006em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.57503em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.61003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-5.85005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.3500499999999995em;"><span></span></span></span></span></span></span><span class=mord ><span class=mtable ><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.757500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-5.5575em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >1</span></span></span><span style="top:-3.6975000000000002em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-1.837500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-0.6375000000000008em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.570000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span><span style="top:-5.37em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-3.5100000000000002em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >1</span></span></span><span style="top:-1.650000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-0.4500000000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.570000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span><span style="top:-5.37em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-3.5100000000000002em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-1.650000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-0.4500000000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.570000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-5.37em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-3.5100000000000002em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-1.650000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-0.4500000000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.757500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >1</span></span></span><span style="top:-5.5575em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-3.6975000000000002em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-1.837500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ></span></span><span style="top:-0.6375000000000008em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span></span></span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.8500500000000004em;"><span style="top:-0.44997000000000076em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-1.5999800000000008em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-2.1949900000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-2.7900000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.3850100000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.9800200000000006em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.57503em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.61003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-5.85005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.3500499999999995em;"><span></span></span></span></span></span></span></span></span></span></span></span> <p>The mathematical problem is now to find a linear layer which is equivariant with respect to the shift: when the input is shifted, the output is also shifted. Hence, we are looking for a <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi><mo>×</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">n\times n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.66666em;vertical-align:-0.08333em;"></span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi></mrow><annotation encoding="application/x-tex">W</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span></span></span></span> with the shift invariance property:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi><mi>S</mi><mo>=</mo><mi>S</mi><mi>W</mi><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> WS=SW. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord >.</span></span></span></span></span> <h2 id=learning_a_solution ><a href="#learning_a_solution" class=header-anchor >Learning a solution</a></h2> <p>There is a simple way to approximate a shift invariant layer from an arbitrary matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi></mrow><annotation encoding="application/x-tex">W</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span></span></span></span>: start from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi></mrow><annotation encoding="application/x-tex">W</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span></span></span></span> and then make it more and more shift invariant by decreasing <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >∥</mi><mi>W</mi><mi>S</mi><mo>−</mo><mi>S</mi><mi>W</mi><msubsup><mi mathvariant=normal >∥</mi><mn>2</mn><mn>2</mn></msubsup></mrow><annotation encoding="application/x-tex">\|WS-SW\|_2^2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span></span></span></span>. When this quantity is zero, we get a shift invariant matrix.</p> <p>Here is a gradient descent algorithm to solve the problem:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><munder><mo><mi>min</mi><mo>⁡</mo></mo><mi>W</mi></munder><mfrac><mrow><mi mathvariant=normal >∥</mi><mi>W</mi><mi>S</mi><mo>−</mo><mi>S</mi><mi>W</mi><msubsup><mi mathvariant=normal >∥</mi><mn>2</mn><mn>2</mn></msubsup></mrow><mrow><mi mathvariant=normal >∥</mi><mi>W</mi><msubsup><mi mathvariant=normal >∥</mi><mn>2</mn><mn>2</mn></msubsup></mrow></mfrac><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> \min_W \frac{\|WS-SW\|_2^2}{\|W\|_2^2}. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.443416em;vertical-align:-0.9523079999999999em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.66786em;"><span style="top:-2.355669em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">W</span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >min</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.744331em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.491108em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7959080000000001em;"><span style="top:-2.433692em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.0448000000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.26630799999999993em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.9523079999999999em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord >.</span></span></span></span></span> <p>coded in <a href="https://docs.julialang.org/en/v1/">Julia</a>:</p> <pre><code class="julia hljs"><span class=hljs-keyword >using</span> LinearAlgebra, Zygote, Plots
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Convolutions from first principles</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=convolutions_and_discrete_fourier_transform_from_first_principles ><a href="#convolutions_and_discrete_fourier_transform_from_first_principles" class=header-anchor >Convolutions &#40;and Discrete Fourier Transform&#41; from first principles</a></h1> <p><strong>author: <a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, course: <a href="https://dataflowr.github.io/website/">dataflowr</a>, module: <a href="https://dataflowr.github.io/website/modules/6-convolutional-neural-network/">Convolutional neural network</a></strong></p> <p>date: June 8, 2021</p> <h2 id=motivation ><a href="#motivation" class=header-anchor >Motivation</a></h2> <p>In the <a href="https://dataflowr.github.io/website/modules/6-convolutional-neural-network/">module on CNN</a>, we presented the convolutional layers as learnable filters. In particular, we have seen that these layers have a particular form of weight sharing &#40;only the parameters of the kernel need to be learned&#41;. The motivation for restricting our attention to this particular weight sharing comes from a long history in signal processing. Here, we would like to recover the intuition for convolutions from first principles.</p> <p>So let&#39;s pretend, we do not know anything about signal processing and we would like to build from scratch a new neural network taking as input an image and producing as output another image. For example in semantic segmentation, each pixel in the input image is linked to a class as shown below &#40;source: <a href="https://youtu.be/ATlcEDSPWXY">DeepLab</a>&#41;: <img src="../conv_files/deeplabcityscape.gif" alt=gif  /></p> <p>Clearly in this case, when a object moves in the image, we want the associated labels to move with it. Hence, before constructing such a neural network, we first need to figure out a way to build a layer having this property: when an object is translated in an image, the output of the layer should be translated with the same translation. This is what we will do here.</p> <h2 id=mathematical_model ><a href="#mathematical_model" class=header-anchor >Mathematical model</a></h2> <p>Here we formalize our problem and simplify it a little bit while keeping its main features. First, instead of images, we will deal with 1D signal <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> of length <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mn>0</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf x}=(x_0,\dots, x_{n-1})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>. Now translation in 1D is also called a shift: <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><mi>S</mi><mi mathvariant=bold >x</mi><msub><mo stretchy=false >)</mo><mi>i</mi></msub><mo>=</mo><msub><mi>x</mi><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">(S{\bf x})_{i} = x_{i-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> corresponds to the shift to the right. Note that we also need to define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><mi>S</mi><mi mathvariant=bold >x</mi><msub><mo stretchy=false >)</mo><mn>0</mn></msub></mrow><annotation encoding="application/x-tex">(S{\bf x})_0</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> in order to keep a signal of length <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>. We will always deal <strong>with indices as integers modulo <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span></strong> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mrow><mo>−</mo><mn>1</mn></mrow></msub><mo>=</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msub></mrow><annotation encoding="application/x-tex">x_{-1} = x_{n-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.638891em;vertical-align:-0.208331em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span></span></span></span> and we define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>S</mi><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>0</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>2</mn></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">S{\bf x} = (x_{n-1}, x_0, \dots, x_{n-2})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.301108em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>. Note that we can write <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>S</mi></mrow><annotation encoding="application/x-tex">S</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span></span></span></span> as a <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi><mo>×</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">n\times n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.66666em;vertical-align:-0.08333em;"></span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> matrix:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>S</mi><mo>=</mo><mrow><mo fence=true >(</mo><mtable rowspacing=0.15999999999999992em  columnalign="center center center center center" columnspacing=1em ><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mi><mi mathvariant=normal >⋮</mi><mpadded height="+0em" voffset=0em ><mspace mathbackground=black  width=0em  height=1.5em ></mspace></mpadded></mi></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >⋱</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mrow></mrow></mstyle></mtd></mtr><mtr><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mo lspace=0em  rspace=0em >…</mo></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>1</mn></mstyle></mtd><mtd><mstyle scriptlevel=0  displaystyle=false ><mn>0</mn></mstyle></mtd></mtr></mtable><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> S = \left( \begin{array}{ccccc} 0&amp;\dots&amp;\dots&amp;0&amp;1\\ 1&amp;\ddots&amp;&amp;&amp;0\\ 0&amp;1&amp;\ddots&amp;&amp;\vdots\\ \vdots &amp;\ddots&amp;\ddots&amp;\ddots&amp;\\ 0&amp;\dots&amp;0&amp;1&amp;0\end{array}\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:7.32em;vertical-align:-3.4099999999999993em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.8500500000000004em;"><span style="top:-0.44997000000000076em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-1.5999800000000008em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-2.1949900000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-2.7900000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.3850100000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-3.9800200000000006em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.57503em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.61003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-5.85005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.3500499999999995em;"><span></span></span></span></span></span></span><span class=mord ><span class=mtable ><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.757500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-5.5575em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >1</span></span></span><span style="top:-3.6975000000000002em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-1.837500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-0.6375000000000008em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.570000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span><span style="top:-5.37em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-3.5100000000000002em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >1</span></span></span><span style="top:-1.650000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-0.4500000000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.570000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >…</span></span></span><span style="top:-5.37em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-3.5100000000000002em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-1.650000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-0.4500000000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.570000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-5.37em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-3.5100000000000002em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ></span></span><span style="top:-1.650000000000001em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=minner >⋱</span></span></span><span style="top:-0.4500000000000008em;"><span class=pstrut  style="height:3.5em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span><span class=arraycolsep  style="width:0.5em;"></span><span class=col-align-c ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.9100000000000006em;"><span style="top:-6.757500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >1</span></span></span><span style="top:-5.5575em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span><span style="top:-3.6975000000000002em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord ><span class=mord >⋮</span><span class="mord rule" style="border-right-width:0em;border-top-width:1.5em;bottom:0em;"></span></span></span></span><span style="top:-1.837500000000001em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ></span></span><span style="top:-0.6375000000000008em;"><span class=pstrut  style="height:3.6875em;"></span><span class=mord ><span class=mord >0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.4099999999999993em;"><span></span></span></span></span></span><span class=arraycolsep  style="width:0.5em;"></span></span></span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:3.8500500000000004em;"><span style="top:-0.44997000000000076em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-1.5999800000000008em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-2.1949900000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-2.7900000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.3850100000000007em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-3.9800200000000006em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.57503em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.61003em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-5.85005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:3.3500499999999995em;"><span></span></span></span></span></span></span></span></span></span></span></span> <p>The mathematical problem is now to find a linear layer which is equivariant with respect to the shift: when the input is shifted, the output is also shifted. Hence, we are looking for a <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi><mo>×</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">n\times n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.66666em;vertical-align:-0.08333em;"></span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi></mrow><annotation encoding="application/x-tex">W</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span></span></span></span> with the shift invariance property:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi><mi>S</mi><mo>=</mo><mi>S</mi><mi>W</mi><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> WS=SW. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord >.</span></span></span></span></span> <h2 id=learning_a_solution ><a href="#learning_a_solution" class=header-anchor >Learning a solution</a></h2> <p>There is a simple way to approximate a shift invariant layer from an arbitrary matrix <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi></mrow><annotation encoding="application/x-tex">W</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span></span></span></span>: start from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>W</mi></mrow><annotation encoding="application/x-tex">W</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span></span></span></span> and then make it more and more shift invariant by decreasing <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >∥</mi><mi>W</mi><mi>S</mi><mo>−</mo><mi>S</mi><mi>W</mi><msubsup><mi mathvariant=normal >∥</mi><mn>2</mn><mn>2</mn></msubsup></mrow><annotation encoding="application/x-tex">\|WS-SW\|_2^2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span></span></span></span>. When this quantity is zero, we get a shift invariant matrix.</p> <p>Here is a gradient descent algorithm to solve the problem:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><munder><mo><mi>min</mi><mo>⁡</mo></mo><mi>W</mi></munder><mfrac><mrow><mi mathvariant=normal >∥</mi><mi>W</mi><mi>S</mi><mo>−</mo><mi>S</mi><mi>W</mi><msubsup><mi mathvariant=normal >∥</mi><mn>2</mn><mn>2</mn></msubsup></mrow><mrow><mi mathvariant=normal >∥</mi><mi>W</mi><msubsup><mi mathvariant=normal >∥</mi><mn>2</mn><mn>2</mn></msubsup></mrow></mfrac><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> \min_W \frac{\|WS-SW\|_2^2}{\|W\|_2^2}. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.443416em;vertical-align:-0.9523079999999999em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.66786em;"><span style="top:-2.355669em;margin-left:0em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">W</span></span></span><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span><span class=mop >min</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.744331em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.491108em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7959080000000001em;"><span style="top:-2.433692em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.0448000000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.26630799999999993em;"><span></span></span></span></span></span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">S</span><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.9523079999999999em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord >.</span></span></span></span></span> <p>coded in <a href="https://docs.julialang.org/en/v1/">Julia</a>:</p> <pre><code class="julia hljs"><span class=hljs-keyword >using</span> LinearAlgebra, Zygote, Plots
 
 <span class=hljs-keyword >const</span> n = <span class=hljs-number >100</span>
 S = circshift(<span class=hljs-built_in >Matrix</span>{<span class=hljs-built_in >Float64</span>}(I, n, n),(<span class=hljs-number >1</span>,<span class=hljs-number >0</span>))
@@ -91,4 +91,4 @@
 <span class=hljs-keyword >end</span>
 plot(target, (-<span class=hljs-number >1.</span>,<span class=hljs-number >1.</span>)...,label=<span class=hljs-string >&quot;target&quot;</span>)
 ylims!((-<span class=hljs-number >10</span>,<span class=hljs-number >10</span>))
-plot!(pred, (-<span class=hljs-number >1.</span>,<span class=hljs-number >1.</span>)...,label=<span class=hljs-string >&quot;pred&quot;</span>)</code></pre> <p><img src="../conv_files/training_plot.png" alt=training_plot  /></p> <p>We see that we get a pretty good approximation of our target polynomial. Below is the a gif showing the convergence of our network towards the target:</p> <p><img src="../conv_files/jl_conv.gif" alt=gif  /></p> <p>By stacking convolutions with kernel of size 3, we obtained a network with a <strong>receptive field</strong> of size 9.</p> <h2 id=thanks_for_reading ><a href="#thanks_for_reading" class=header-anchor >Thanks for reading&#33;</a></h2> <p>Follow on <a href="https://twitter.com/marc_lelarge">twitter</a>&#33;</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+plot!(pred, (-<span class=hljs-number >1.</span>,<span class=hljs-number >1.</span>)...,label=<span class=hljs-string >&quot;pred&quot;</span>)</code></pre> <p><img src="../conv_files/training_plot.png" alt=training_plot  /></p> <p>We see that we get a pretty good approximation of our target polynomial. Below is the a gif showing the convergence of our network towards the target:</p> <p><img src="../conv_files/jl_conv.gif" alt=gif  /></p> <p>By stacking convolutions with kernel of size 3, we obtained a network with a <strong>receptive field</strong> of size 9.</p> <h2 id=thanks_for_reading ><a href="#thanks_for_reading" class=header-anchor >Thanks for reading&#33;</a></h2> <p>Follow on <a href="https://twitter.com/marc_lelarge">twitter</a>&#33;</p> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/extras/GCN_inductivebias_spectral/index.html b/modules/extras/GCN_inductivebias_spectral/index.html
index 791c061..d037d5b 100644
--- a/modules/extras/GCN_inductivebias_spectral/index.html
+++ b/modules/extras/GCN_inductivebias_spectral/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item active" href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=inductive_bias_in_gcn_a_spectral_perspective ><a href="#inductive_bias_in_gcn_a_spectral_perspective" class=header-anchor >Inductive bias in GCN: a spectral perspective</a></h1> <p><strong>author: <a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, course: <a href="https://dataflowr.github.io/website/">dataflowr</a></strong></p> <p><strong>run the <a href="https://github.com/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral.ipynb">code</a> or open it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral-colab.ipynb">Colab</a></strong></p> <p>date: April 15, 2021</p> <p>Here, we focus on Graph Convolution Networks &#40;GCN&#41; introduced by Kipf and Welling in their paper <a href="https://arxiv.org/abs/1609.02907">Semi-Supervised Classification with Graph Convolutional Networks</a>. The GCN layer is one of the simplest Graph Neural Network layer defined by:</p> <a id=eqgcn_layer  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn><mo stretchy=false >)</mo></mrow></msubsup><mo>=</mo><mfrac><mn>1</mn><mrow><msub><mi>d</mi><mi>i</mi></msub><mo>+</mo><mn>1</mn></mrow></mfrac><msubsup><mi>h</mi><mi>i</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msubsup><msup><mi>W</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msup><mo>+</mo><munder><mo>∑</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></munder><mfrac><mrow><msubsup><mi>h</mi><mi>j</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msubsup><msup><mi>W</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msup></mrow><msqrt><mrow><mo stretchy=false >(</mo><msub><mi>d</mi><mi>i</mi></msub><mo>+</mo><mn>1</mn><mo stretchy=false >)</mo><mo stretchy=false >(</mo><msub><mi>d</mi><mi>j</mi></msub><mo>+</mo><mn>1</mn><mo stretchy=false >)</mo></mrow></msqrt></mfrac><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> h_i^{(\ell+1)} = \frac{1}{d_i+1}h_i^{(\ell)}W^{(\ell)} + \sum_{j\sim i} \frac{h_j^{(\ell)}W^{(\ell)}}{\sqrt{(d_i+1)(d_j+1)}}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.321664em;vertical-align:-0.276864em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.276864em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.1574400000000002em;vertical-align:-0.8360000000000001em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord >1</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.276864em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.938em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:3.261549em;vertical-align:-1.413777em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8723309999999997em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.413777em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.847772em;"><span style="top:-2.237854em;"><span class=pstrut  style="height:3.0448em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.916946em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord >1</span><span class=mclose >)</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord >1</span><span class=mclose >)</span></span></span><span style="top:-2.8769460000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.32305399999999995em;"><span></span></span></span></span></span></span></span><span style="top:-3.2748em;"><span class=pstrut  style="height:3.0448em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.847772em;"><span class=pstrut  style="height:3.0448em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.4129719999999999em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.13em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo>∼</mo><mi>j</mi></mrow><annotation encoding="application/x-tex">i\sim j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> means that nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> are neighbors in the graph <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>G</mi></mrow><annotation encoding="application/x-tex">G</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">G</span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>d</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">d_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>d</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">d_j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.980548em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> are the respective degrees of nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> &#40;i.e. their number of neighbors in the graph&#41; and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msubsup></mrow><annotation encoding="application/x-tex">h_i^{(\ell)}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.321664em;vertical-align:-0.27686399999999994em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.27686399999999994em;"><span></span></span></span></span></span></span></span></span></span> is the embedding representation of node <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> at layer <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >ℓ</mi></mrow><annotation encoding="application/x-tex">\ell</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord >ℓ</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>W</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msup></mrow><annotation encoding="application/x-tex">W^{(\ell)}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8879999999999999em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span></span></span></span> is a trainable weight matrix of shape <code>&#91;size_input_feature, size_output_feature&#93;</code>.</p> <p>The <a href="https://en.wikipedia.org/wiki/Inductive_bias">inductive bias</a> of a learning algorithm is the set of assumptions that the learner uses to predict outputs of given inputs that it has not encountered. For GCN, we argue that the inductive bias can be formulated as a simple spectral property of the algorithm: GCN acts as low-pass filters. This arguments follows from recent works <a href="http://proceedings.mlr.press/v97/wu19e.html">Simplifying Graph Convolutional Networks</a> by Wu, Souza, Zhang, Fifty, Yu, Weinberger and <a href="https://arxiv.org/abs/1905.09550">Revisiting Graph Neural Networks: All We Have is Low-Pass Filters</a> by NT and Maehara.</p> <p>Here we will study a very simple case and relate the inductive bias of GCN to the property of the Fiedler vector of the graph. We&#39;ll consider the more general setting in a subsequent post.</p> <h2 id=notations ><a href="#notations" class=header-anchor >Notations</a></h2> <p>We consider undirected graphs <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>G</mi><mo>=</mo><mo stretchy=false >(</mo><mi>V</mi><mo separator=true >,</mo><mi>E</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">G=(V,E)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">G</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">E</span><span class=mclose >)</span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> vertices denoted by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi><mo>∈</mo><mo stretchy=false >[</mo><mi>n</mi><mo stretchy=false >]</mo></mrow><annotation encoding="application/x-tex">i,j \in [n]</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">i</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class="mord mathdefault">n</span><span class=mclose >]</span></span></span></span>. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo>∼</mo><mi>j</mi></mrow><annotation encoding="application/x-tex">i\sim j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> means that nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> are neighbors in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>G</mi></mrow><annotation encoding="application/x-tex">G</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">G</span></span></span></span>, i.e. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >{</mo><mi>i</mi><mo separator=true >,</mo><mi>j</mi><mo stretchy=false >}</mo><mo>∈</mo><mi>E</mi></mrow><annotation encoding="application/x-tex">\{i,j\}\in E</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class="mord mathdefault">i</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span><span class=mclose >}</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">E</span></span></span></span>. We denote by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>A</mi></mrow><annotation encoding="application/x-tex">A</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">A</span></span></span></span> its <a href="https://en.wikipedia.org/wiki/Adjacency_matrix">adjacency matrix</a> and by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>D</mi></mrow><annotation encoding="application/x-tex">D</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">D</span></span></span></span> the diagonal matrix of degrees. The vector of degrees is denoted by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi><mo>=</mo><mi>A</mi><mn>1</mn></mrow><annotation encoding="application/x-tex">d= A1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">A</span><span class=mord >1</span></span></span></span>. The components of a vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">x\in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> are denoted <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">x_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> but sometimes it is convenient to see the vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi></mrow><annotation encoding="application/x-tex">x</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">x</span></span></span></span> as a function from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>V</mi></mrow><annotation encoding="application/x-tex">V</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span> and use the notation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">x(i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">x</span><span class=mopen >(</span><span class="mord mathdefault">i</span><span class=mclose >)</span></span></span></span> instead of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">x_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>.</p> <h2 id=community_detection_in_the_karate_club ><a href="#community_detection_in_the_karate_club" class=header-anchor >Community detection in the Karate Club</a></h2> <p>We&#39;ll start with an unsupervised problem: given one graph, find a partition of its node in communities. In this case, we make the hypothesis that individuals tend to associate and bond with similar others, which is known as <a href="https://en.wikipedia.org/wiki/Homophily">homophily</a>.</p> <p>To study this problem, we will focus on the <a href="https://en.wikipedia.org/wiki/Zachary&#37;27s_karate_club">Zachary&#39;s karate club</a> and try to recover the split of the club from the graph of connections. The <a href="https://pytorch-geometric.readthedocs.io/en/latest/#">pytorch-geometric</a> library will be very convenient. </p> <p>Note that GCN are not appropriate in an unsupervised setting as no learning is possible without any label on the vertices. However, this is not a problem here as we will not train the GCN&#33; In more practical settings, GCN are used in a semi-supervised setting where a few labels are revealed for a few nodes &#40;more on this in the section with the Cora dataset&#41;.</p> <pre><code class="python hljs"><span class=hljs-keyword >from</span> torch_geometric.datasets <span class=hljs-keyword >import</span> KarateClub
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item active" href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=inductive_bias_in_gcn_a_spectral_perspective ><a href="#inductive_bias_in_gcn_a_spectral_perspective" class=header-anchor >Inductive bias in GCN: a spectral perspective</a></h1> <p><strong>author: <a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, course: <a href="https://dataflowr.github.io/website/">dataflowr</a></strong></p> <p><strong>run the <a href="https://github.com/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral.ipynb">code</a> or open it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral-colab.ipynb">Colab</a></strong></p> <p>date: April 15, 2021</p> <p>Here, we focus on Graph Convolution Networks &#40;GCN&#41; introduced by Kipf and Welling in their paper <a href="https://arxiv.org/abs/1609.02907">Semi-Supervised Classification with Graph Convolutional Networks</a>. The GCN layer is one of the simplest Graph Neural Network layer defined by:</p> <a id=eqgcn_layer  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn><mo stretchy=false >)</mo></mrow></msubsup><mo>=</mo><mfrac><mn>1</mn><mrow><msub><mi>d</mi><mi>i</mi></msub><mo>+</mo><mn>1</mn></mrow></mfrac><msubsup><mi>h</mi><mi>i</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msubsup><msup><mi>W</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msup><mo>+</mo><munder><mo>∑</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></munder><mfrac><mrow><msubsup><mi>h</mi><mi>j</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msubsup><msup><mi>W</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msup></mrow><msqrt><mrow><mo stretchy=false >(</mo><msub><mi>d</mi><mi>i</mi></msub><mo>+</mo><mn>1</mn><mo stretchy=false >)</mo><mo stretchy=false >(</mo><msub><mi>d</mi><mi>j</mi></msub><mo>+</mo><mn>1</mn><mo stretchy=false >)</mo></mrow></msqrt></mfrac><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> h_i^{(\ell+1)} = \frac{1}{d_i+1}h_i^{(\ell)}W^{(\ell)} + \sum_{j\sim i} \frac{h_j^{(\ell)}W^{(\ell)}}{\sqrt{(d_i+1)(d_j+1)}}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.321664em;vertical-align:-0.276864em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.276864em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.1574400000000002em;vertical-align:-0.8360000000000001em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.3139999999999996em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord >1</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.8360000000000001em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.276864em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.938em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:3.261549em;vertical-align:-1.413777em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8723309999999997em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.413777em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.847772em;"><span style="top:-2.237854em;"><span class=pstrut  style="height:3.0448em;"></span><span class=mord ><span class="mord sqrt"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.916946em;"><span class=svg-align  style="top:-3.2em;"><span class=pstrut  style="height:3.2em;"></span><span class=mord  style="padding-left:1em;"><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord >1</span><span class=mclose >)</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord >1</span><span class=mclose >)</span></span></span><span style="top:-2.8769460000000002em;"><span class=pstrut  style="height:3.2em;"></span><span class=hide-tail  style="min-width:1.02em;height:1.28em;"><svg width='400em' height='1.28em' viewBox='0 0 400000 1296' preserveAspectRatio='xMinYMin slice'><path d='M263,681c0.7,0,18,39.7,52,119 c34,79.3,68.167,158.7,102.5,238c34.3,79.3,51.8,119.3,52.5,120 c340,-704.7,510.7,-1060.3,512,-1067 l0 -0 c4.7,-7.3,11,-11,19,-11 H40000v40H1012.3 s-271.3,567,-271.3,567c-38.7,80.7,-84,175,-136,283c-52,108,-89.167,185.3,-111.5,232 c-22.3,46.7,-33.8,70.3,-34.5,71c-4.7,4.7,-12.3,7,-23,7s-12,-1,-12,-1 s-109,-253,-109,-253c-72.7,-168,-109.3,-252,-110,-252c-10.7,8,-22,16.7,-34,26 c-22,17.3,-33.3,26,-34,26s-26,-26,-26,-26s76,-59,76,-59s76,-60,76,-60z M1001 80h400000v40h-400000z'/></svg></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.32305399999999995em;"><span></span></span></span></span></span></span></span><span style="top:-3.2748em;"><span class=pstrut  style="height:3.0448em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.847772em;"><span class=pstrut  style="height:3.0448em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.4129719999999999em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.13em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo>∼</mo><mi>j</mi></mrow><annotation encoding="application/x-tex">i\sim j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> means that nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> are neighbors in the graph <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>G</mi></mrow><annotation encoding="application/x-tex">G</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">G</span></span></span></span>, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>d</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">d_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>d</mi><mi>j</mi></msub></mrow><annotation encoding="application/x-tex">d_j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.980548em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault">d</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span> are the respective degrees of nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> &#40;i.e. their number of neighbors in the graph&#41; and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msubsup></mrow><annotation encoding="application/x-tex">h_i^{(\ell)}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.321664em;vertical-align:-0.27686399999999994em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0448em;"><span style="top:-2.4231360000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.2198em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.27686399999999994em;"><span></span></span></span></span></span></span></span></span></span> is the embedding representation of node <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> at layer <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >ℓ</mi></mrow><annotation encoding="application/x-tex">\ell</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class=mord >ℓ</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>W</mi><mrow><mo stretchy=false >(</mo><mi mathvariant=normal >ℓ</mi><mo stretchy=false >)</mo></mrow></msup></mrow><annotation encoding="application/x-tex">W^{(\ell)}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8879999999999999em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8879999999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mopen mtight">(</span><span class="mord mtight">ℓ</span><span class="mclose mtight">)</span></span></span></span></span></span></span></span></span></span></span></span> is a trainable weight matrix of shape <code>&#91;size_input_feature, size_output_feature&#93;</code>.</p> <p>The <a href="https://en.wikipedia.org/wiki/Inductive_bias">inductive bias</a> of a learning algorithm is the set of assumptions that the learner uses to predict outputs of given inputs that it has not encountered. For GCN, we argue that the inductive bias can be formulated as a simple spectral property of the algorithm: GCN acts as low-pass filters. This arguments follows from recent works <a href="http://proceedings.mlr.press/v97/wu19e.html">Simplifying Graph Convolutional Networks</a> by Wu, Souza, Zhang, Fifty, Yu, Weinberger and <a href="https://arxiv.org/abs/1905.09550">Revisiting Graph Neural Networks: All We Have is Low-Pass Filters</a> by NT and Maehara.</p> <p>Here we will study a very simple case and relate the inductive bias of GCN to the property of the Fiedler vector of the graph. We&#39;ll consider the more general setting in a subsequent post.</p> <h2 id=notations ><a href="#notations" class=header-anchor >Notations</a></h2> <p>We consider undirected graphs <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>G</mi><mo>=</mo><mo stretchy=false >(</mo><mi>V</mi><mo separator=true >,</mo><mi>E</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">G=(V,E)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">G</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">E</span><span class=mclose >)</span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> vertices denoted by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo separator=true >,</mo><mi>j</mi><mo>∈</mo><mo stretchy=false >[</mo><mi>n</mi><mo stretchy=false >]</mo></mrow><annotation encoding="application/x-tex">i,j \in [n]</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">i</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class="mord mathdefault">n</span><span class=mclose >]</span></span></span></span>. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo>∼</mo><mi>j</mi></mrow><annotation encoding="application/x-tex">i\sim j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> means that nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> are neighbors in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>G</mi></mrow><annotation encoding="application/x-tex">G</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">G</span></span></span></span>, i.e. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >{</mo><mi>i</mi><mo separator=true >,</mo><mi>j</mi><mo stretchy=false >}</mo><mo>∈</mo><mi>E</mi></mrow><annotation encoding="application/x-tex">\{i,j\}\in E</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class="mord mathdefault">i</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span><span class=mclose >}</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.05764em;">E</span></span></span></span>. We denote by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>A</mi></mrow><annotation encoding="application/x-tex">A</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">A</span></span></span></span> its <a href="https://en.wikipedia.org/wiki/Adjacency_matrix">adjacency matrix</a> and by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>D</mi></mrow><annotation encoding="application/x-tex">D</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.02778em;">D</span></span></span></span> the diagonal matrix of degrees. The vector of degrees is denoted by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi></mrow><annotation encoding="application/x-tex">d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi><mo>=</mo><mi>A</mi><mn>1</mn></mrow><annotation encoding="application/x-tex">d= A1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">d</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault">A</span><span class=mord >1</span></span></span></span>. The components of a vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">x\in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> are denoted <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">x_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> but sometimes it is convenient to see the vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi></mrow><annotation encoding="application/x-tex">x</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">x</span></span></span></span> as a function from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>V</mi></mrow><annotation encoding="application/x-tex">V</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span> and use the notation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">x(i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">x</span><span class=mopen >(</span><span class="mord mathdefault">i</span><span class=mclose >)</span></span></span></span> instead of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">x_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>.</p> <h2 id=community_detection_in_the_karate_club ><a href="#community_detection_in_the_karate_club" class=header-anchor >Community detection in the Karate Club</a></h2> <p>We&#39;ll start with an unsupervised problem: given one graph, find a partition of its node in communities. In this case, we make the hypothesis that individuals tend to associate and bond with similar others, which is known as <a href="https://en.wikipedia.org/wiki/Homophily">homophily</a>.</p> <p>To study this problem, we will focus on the <a href="https://en.wikipedia.org/wiki/Zachary&#37;27s_karate_club">Zachary&#39;s karate club</a> and try to recover the split of the club from the graph of connections. The <a href="https://pytorch-geometric.readthedocs.io/en/latest/#">pytorch-geometric</a> library will be very convenient. </p> <p>Note that GCN are not appropriate in an unsupervised setting as no learning is possible without any label on the vertices. However, this is not a problem here as we will not train the GCN&#33; In more practical settings, GCN are used in a semi-supervised setting where a few labels are revealed for a few nodes &#40;more on this in the section with the Cora dataset&#41;.</p> <pre><code class="python hljs"><span class=hljs-keyword >from</span> torch_geometric.datasets <span class=hljs-keyword >import</span> KarateClub
 
 dataset = KarateClub()
 <span class=hljs-built_in >print</span>(<span class=hljs-string >f&#x27;Dataset: <span class=hljs-subst >{dataset}</span>:&#x27;</span>)
@@ -194,7 +194,7 @@ <h2 id=thanks_for_reading ><a href="#thanks_for_reading" class=header-anchor >Th
 <div class=page-foot >
   <div class=copyright >
     <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a>
-    Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
+    Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
   </div>
 </div>
 </div>
diff --git a/modules/extras/clip/diagram.png b/modules/extras/clip/diagram.png
new file mode 100644
index 0000000..a1b5ec9
Binary files /dev/null and b/modules/extras/clip/diagram.png differ
diff --git a/modules/extras/graph_invariant/index.html b/modules/extras/graph_invariant/index.html
index 6db989b..d43fa1c 100644
--- a/modules/extras/graph_invariant/index.html
+++ b/modules/extras/graph_invariant/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Exploiting Graph Invariants in Deep Learning</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item active" href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=exploiting_graph_invariants_in_deep_learning ><a href="#exploiting_graph_invariants_in_deep_learning" class=header-anchor >Exploiting Graph Invariants in Deep Learning</a></h1> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'J4iYhmXLHrM', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(48,0)'> 0:48</a> Skip the french part! <ul> <li><p><a href=Prairie.pdf >Slides</a></p> <li><p><a href="https://github.com/mlelarge/graph_neural_net">code</a></p> <li><p><a href="https://openreview.net/forum?id&#61;lxHgXYN4bwl">Paper</a></p> <li><p>Related post: <a href="https://dataflowr.github.io/website/modules/extras/invariant_equivariant/">Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Exploiting Graph Invariants in Deep Learning</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item active" href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=exploiting_graph_invariants_in_deep_learning ><a href="#exploiting_graph_invariants_in_deep_learning" class=header-anchor >Exploiting Graph Invariants in Deep Learning</a></h1> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'J4iYhmXLHrM', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(48,0)'> 0:48</a> Skip the french part! <ul> <li><p><a href=Prairie.pdf >Slides</a></p> <li><p><a href="https://github.com/mlelarge/graph_neural_net">code</a></p> <li><p><a href="https://openreview.net/forum?id&#61;lxHgXYN4bwl">Paper</a></p> <li><p>Related post: <a href="https://dataflowr.github.io/website/modules/extras/invariant_equivariant/">Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/extras/invariant_equivariant/index.html b/modules/extras/invariant_equivariant/index.html
index 9a0d0f5..104de26 100644
--- a/modules/extras/invariant_equivariant/index.html
+++ b/modules/extras/invariant_equivariant/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Invariant and Equivariant layers</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item active" href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=invariant_and_equivariant_layers_with_applications_to_gnn_pointnet_and_transformers ><a href="#invariant_and_equivariant_layers_with_applications_to_gnn_pointnet_and_transformers" class=header-anchor >Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></h1> <p><strong>author: <a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, course: <a href="https://dataflowr.github.io/website/">dataflowr</a></strong></p> <p>date: April 23, 2021</p> <h2 id=invariant_and_equivariant_functions ><a href="#invariant_and_equivariant_functions" class=header-anchor >Invariant and equivariant functions</a></h2> <p>As shown in the <a href="https://dataflowr.github.io/website/modules/graph3/">module on GNN</a>, invariant and equivariant functions are crucial for GNN. For example, the message passing GNN &#40;MGNN&#41; layer is defined by:</p> <a id=eqgnnlayer  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn></mrow></msubsup><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><msubsup><mi>h</mi><mi>i</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo separator=true >,</mo><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msubsup><mi>h</mi><mi>j</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >}</mo><msub><mo stretchy=false >}</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></msub><mo stretchy=false >)</mo><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> h^{\ell+1}_i = f(h^\ell_i , \{\{ h^\ell_j\}\}_{j\sim i}), </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1661029999999997em;vertical-align:-0.266995em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.433005em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.266995em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.282216em;vertical-align:-0.383108em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.899108em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.1130000000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.383108em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose ><span class=mclose >}</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo>∼</mo><mi>j</mi></mrow><annotation encoding="application/x-tex">i\sim j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> means that nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> are neighbors and the function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> should not depend on the order of the elements in the multiset <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msubsup><mi>h</mi><mi>j</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >}</mo><msub><mo stretchy=false >}</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\{\{ h^\ell_j\}\}_{j\sim i}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.2438799999999999em;vertical-align:-0.394772em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.441336em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.394772em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose ><span class=mclose >}</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span>. This layer is applied in parallel to all nodes &#40;with the same function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span>&#41; producing a mapping from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=bold >h</mi><mi mathvariant=normal >ℓ</mi></msup><mo>=</mo><mo stretchy=false >(</mo><msubsup><mi>h</mi><mn>1</mn><mi mathvariant=normal >ℓ</mi></msubsup><mo>…</mo><mo separator=true >,</mo><msubsup><mi>h</mi><mi>n</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf h}^\ell = (h^\ell_1\dots, h^\ell_n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">h</span></span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.099108em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi><mo stretchy=false >(</mo><msup><mi mathvariant=bold >h</mi><mi mathvariant=normal >ℓ</mi></msup><mo stretchy=false >)</mo><mo>=</mo><msup><mi mathvariant=bold >h</mi><mrow><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">F({\bf h}^\ell) = {\bf h}^{\ell+1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.099108em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">h</span></span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">h</span></span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">F:\mathbb{R}^n \to \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> is the number of nodes in the graph &#40;and only real hidden states are considered for simplicity&#41;. It is easy to see that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi></mrow><annotation encoding="application/x-tex">F</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span></span></span></span> is an equivariant function, i.e. permuting its input will permute its output. </p> <p>Another example of invariant and equivariant functions is given by the attention layer <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>Q</mi><mo separator=true >,</mo><mi>K</mi><mo separator=true >,</mo><mi>V</mi><mo stretchy=false >)</mo><mo>=</mo><mi>Z</mi></mrow><annotation encoding="application/x-tex">\text{Attention}(Q,K,V) = Z</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault">Q</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span></span></span></span> defined for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Q</mi></mrow><annotation encoding="application/x-tex">Q</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">Q</span></span></span></span> a tensor of row queries, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>K</mi></mrow><annotation encoding="application/x-tex">K</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span></span></span></span> the keys and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>V</mi></mrow><annotation encoding="application/x-tex">V</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span></span></span></span> the values, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Q</mi><mo separator=true >,</mo><mi>K</mi><mo separator=true >,</mo><mi>V</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>n</mi><mo>×</mo><mi>d</mi></mrow></msup></mrow><annotation encoding="application/x-tex">Q,K,V\in \mathbb{R}^{n\times d}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">Q</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span></span> by</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Z</mi><mi>j</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><msub><mtext>softmax</mtext><mi>i</mi></msub><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>j</mi></msub><msubsup><mi>K</mi><mi>i</mi><mi>T</mi></msubsup><mo stretchy=false >)</mo><msub><mi>V</mi><mi>i</mi></msub><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> Z_j = \sum_{i=1}^n \text{softmax}_i(Q_jK_i^T) V_i. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.969438em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-2.4530000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >.</span></span></span></span></span> <p>The queries are obtained from a tensor <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>n</mi><mo>×</mo><mi>c</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X\in \mathbb{R}^{n\times c}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.771331em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.771331em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">c</span></span></span></span></span></span></span></span></span></span></span></span> by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Q</mi><mo>=</mo><mi>X</mi><msubsup><mi>W</mi><mi>Q</mi><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">Q= XW_Q^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">Q</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.25277em;vertical-align:-0.411439em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.411439em;"><span></span></span></span></span></span></span></span></span></span> and the keys and values are obtained from a tensor <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>n</mi><mo>×</mo><msup><mi>c</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow></msup></mrow><annotation encoding="application/x-tex">X&#x27; \in \mathbb{R}^{n\times c&#x27;}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.790992em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.94248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.94248em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">c</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>K</mi><mo>=</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><msubsup><mi>W</mi><mi>K</mi><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">K = X&#x27; W_K^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1166619999999998em;vertical-align:-0.275331em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.275331em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>V</mi><mo>=</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><msubsup><mi>W</mi><mi>V</mi><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">V = X&#x27; W_V^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1166619999999998em;vertical-align:-0.275331em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.275331em;"><span></span></span></span></span></span></span></span></span></span>. We see that when the queries are fixed, the attention layer is invariant in the pair &#40;keys, values&#41;:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Z</mi><mi>j</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><msub><mtext>softmax</mtext><mi>i</mi></msub><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>j</mi></msub><msubsup><mi>K</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo></mrow><mi>T</mi></msubsup><mo stretchy=false >)</mo><msub><mi>V</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo></mrow></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Z_j = \sum_{i=1}^n \text{softmax}_{i}(Q_j K_{\sigma(i)}^T) V_{\sigma(i)}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.969438em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-2.428em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">i</span><span class="mclose mtight">)</span></span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.4469999999999999em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">i</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>hence <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo separator=true >,</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Attention}(X,X&#x27;)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.001892em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> is invariant in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow><annotation encoding="application/x-tex">X&#x27;</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span>. Similarly, when the pair &#40;keys, values&#41; is fixed, the attention layer is equivariant in the queries:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Z</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>j</mi><mo stretchy=false >)</mo></mrow></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><msub><mtext>softmax</mtext><mi>i</mi></msub><mo stretchy=false >(</mo><msub><mi>Q</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>j</mi><mo stretchy=false >)</mo></mrow></msub><msubsup><mi>K</mi><mi>i</mi><mi>T</mi></msubsup><mo stretchy=false >)</mo><msub><mi>V</mi><mi>i</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Z_{\sigma(j)} = \sum_{i=1}^n \text{softmax}_{i}(Q_{\sigma(j)}K_{i}^T) V_{i}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.03853em;vertical-align:-0.3551999999999999em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-2.4530000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>hence <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo separator=true >,</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Attention}(X,X&#x27;)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.001892em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> is equivariant in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi></mrow><annotation encoding="application/x-tex">X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span>. If <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>=</mo><mi>X</mi></mrow><annotation encoding="application/x-tex">X&#x27;=X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span>, we get the self-attention layer so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>SelfAttention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo stretchy=false >)</mo><mo>=</mo><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo separator=true >,</mo><mi>X</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{SelfAttention}(X) = \text{Attention}(X,X)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >SelfAttention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mclose >)</span></span></span></span> is equivariant in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi></mrow><annotation encoding="application/x-tex">X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span>.</p> <p>In this post, we will <strong>characterize invariant and equivariant functions</strong> following the ideas given in the paper <a href="https://arxiv.org/abs/1703.06114">Deep Sets</a>.</p> <h2 id=representation_of_invariant_and_equivariant_functions ><a href="#representation_of_invariant_and_equivariant_functions" class=header-anchor >Representation of invariant and equivariant functions</a></h2> <p>We start with some definitions.</p> <p>For a vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">{\bf x} = (x_1,\dots, x_n)\in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and a permutation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma \in \mathcal{S}_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we define</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><msup><mi>σ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo></mrow></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><msup><mi>σ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi>n</mi><mo stretchy=false >)</mo></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex"> \sigma \star {\bf x} = (x_{\sigma^{-1}(1)},\dots, x_{\sigma^{-1}(n)}) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.46528em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1052em;vertical-align:-0.3551999999999999em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7463142857142857em;"><span style="top:-2.786em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class="mopen mtight">(</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7463142857142857em;"><span style="top:-2.786em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">n</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span></span> <p><strong>Definitions:</strong></p> <ul> <li><p>A function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">f:\mathbb{R}^n\to \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span> is <strong>invariant</strong> if for all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> and all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma \in \mathcal{S}_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f(\sigma \star {\bf x}) = f({\bf x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span></span></span></span>.</p> <li><p>A function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">f:\mathbb{R}^n\to \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> is <strong>equivariant</strong> if for all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> and all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma \in \mathcal{S}_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>σ</mi><mo>⋆</mo><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f(\sigma \star {\bf x}) = \sigma \star f({\bf x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.46528em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span></span></span></span>.</p> </ul> <p>We can now state our main result:</p> <div class=colbox-blue ><p><strong>Theorem</strong></p> <ul> <li><p><strong>invariant case:</strong> let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">f:[0,1]^n \to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> be a continuous function. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> is invariant if and only if there are continuous functions <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\phi: [0,1] \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\rho: \mathbb R^n\to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> such that</p> </ul> <a id=eqinv  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> f({\bf x}) = \rho\left( \sum_{i=1}^n \phi(x_i)\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.027669em;vertical-align:-1.277669em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span></span> <ul> <li><p><strong>equivariant case:</strong> let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">f:[0,1]^n \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> be a continuous function. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> is equivariant if and only if there are continuous functions <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\phi: [0,1] \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>×</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\rho: [0,1]\times \mathbb R^n\to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> such that</p> </ul> <a id=eqequiv  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>j</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>j</mi></msub><mo separator=true >,</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> f_j({\bf x}) = \rho\left( x_j, \sum_{i=1}^n \phi(x_i)\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.036108em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.027669em;vertical-align:-1.277669em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span></span></div> <p>We give some remarks before providing the proof below. For the sake of simplicity, we consider here a fixed number of points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> on the unit interval <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo></mrow><annotation encoding="application/x-tex">[0,1]</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span></span></span></span>. For results with a varying number of points, see <a href="https://arxiv.org/abs/1901.09006">On the Limitations of Representing Functions on Sets</a> and for points in higher dimension <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">[0,1]^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.099108em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi><mo>&gt;</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">d&gt;1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.73354em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">d</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&gt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span>, see <a href="https://arxiv.org/abs/1910.02421">On Universal Equivariant Set Networks</a> and <a href="https://arxiv.org/abs/2006.15646">Expressive Power of Invariant and Equivariant Graph Neural Networks</a>.</p> <p>Our proof will make the mapping <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi></mrow><annotation encoding="application/x-tex">\phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span></span></span></span> explicit and it will not depend on the function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span>. The mapping <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi></mrow><annotation encoding="application/x-tex">\phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span></span></span></span> can be seen as an embedding of the points in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo></mrow><annotation encoding="application/x-tex">[0,1]</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span></span></span></span> in a space of high-dimension. Indeed this embedding space has to be of dimension at least the number of points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> in order to ensure universality. This is an important remark as in learning scenario, the size of the embedding is typically fixed and hence will limit the expressiveness of the algorithm.</p> <p>Coming back to the GNN layer <span class=eqref >(<a href="#eqgnnlayer">1</a>)</span>, our result on the invariant case tells us that we can always rewrite it as:</p> <a id=eqgnnlayer2  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn></mrow></msubsup><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msubsup><mi>h</mi><mi>i</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo separator=true >,</mo><munder><mo>∑</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></munder><mi>ϕ</mi><mo stretchy=false >(</mo><msubsup><mi>h</mi><mi>j</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> h^{\ell+1}_i =\rho\left( h_i^{\ell}, \sum_{j\sim i} \phi(h^\ell_j)\right), </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1661029999999997em;vertical-align:-0.266995em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.433005em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.266995em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.163777em;vertical-align:-1.413777em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8723309999999997em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.413777em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.899108em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.1130000000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.383108em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span></span></span></span></span> <p>and the dimension of the embedding <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo stretchy=false >(</mo><mi>h</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\phi(h)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class="mord mathdefault">h</span><span class=mclose >)</span></span></span></span> needs to be of the same order as the maximum degree in the graph. Note that <span class=eqref >(<a href="#eqgnnlayer2">8</a>)</span> is not of the form of <span class=eqref >(<a href="#eqequiv">7</a>)</span> as the sum inside the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="application/x-tex">\rho</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span></span></span></span> function is taken only on neighbors. Indeed, we know that message passing GNN are not universal &#40;see <a href="https://arxiv.org/abs/2006.15646">Expressive Power of Invariant and Equivariant Graph Neural Networks</a>&#41;.</p> <p>As a last remark, note that the original <a href="https://arxiv.org/abs/1612.00593">PointNet</a> architecture <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> is of the form <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>i</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f_i({\bf x}) = \rho(x_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> which is not universal equivariant. Indeed, it is impossible to approximate the equivariant function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>g</mi><mi>i</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><msub><mo>∑</mo><mi>i</mi></msub><msub><mi>x</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">g_i({\bf x}) = \sum_i x_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.0497100000000001em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> as shown below &#40;we denote <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >e</mi><mn>1</mn></msub><mo>=</mo><mo stretchy=false >(</mo><mn>1</mn><mo separator=true >,</mo><mn>0</mn><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mn>0</mn><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf e}_1=(1,0,\dots,0)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.59444em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">e</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord >1</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >0</span><span class=mclose >)</span></span></span></span>&#41;:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >∥</mi><mi>f</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><mo>−</mo><mi>g</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo>=</mo><mi>n</mi><mi>ρ</mi><mo stretchy=false >(</mo><mn>0</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mspace linebreak=newline ></mspace><mi mathvariant=normal >∥</mi><mi>f</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >e</mi><mn>1</mn></msub><mo stretchy=false >)</mo><mo>−</mo><mi>g</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >e</mi><mn>1</mn></msub><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo>=</mo><mo stretchy=false >(</mo><mi>ρ</mi><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>+</mo><mo stretchy=false >(</mo><mi>n</mi><mo>−</mo><mn>1</mn><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi>ρ</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>≥</mo><mo stretchy=false >(</mo><mi>n</mi><mo>−</mo><mn>1</mn><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi>ρ</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> \|f(0) - g(0)\|^2 = n \rho(0)^2\\ \|f({\bf e}_1) -g({\bf e}_1)\|^2 = (\rho(1)-1)^2 + (n-1)(\rho(0)-1)^2\geq (n-1)(\rho(0)-1)^2, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord mathdefault">n</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">e</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">e</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >1</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose >)</span><span class=mopen >(</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≥</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose >)</span><span class=mopen >(</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>and these quantities cannot be small together. Hence PointNet is not universal equivariant but as shown in <a href="https://arxiv.org/abs/1910.02421">On Universal Equivariant Set Networks</a>, modifying PointNet by adding the term <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex"> \sum_{i=1}^n \phi(x_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.104002em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> inside the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="application/x-tex">\rho</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span></span></span></span> function as in <span class=eqref >(<a href="#eqequiv">7</a>)</span> makes it universal equivariant. We refer to <a href="https://arxiv.org/abs/1912.10077">Are Transformers universal approximators of sequence-to-sequence functions?</a> for similar results about transformers based on self-attention.</p> <h2 id=proof_of_the_theorem ><a href="#proof_of_the_theorem" class=header-anchor >Proof of the Theorem</a></h2> <p>We first show that the equivariant case is not more difficult than the invariant case. Assume that we proved the invariant case. Consider a permutation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma\in \mathcal S_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.075em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> such that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sigma(1)=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class=mord >1</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>σ</mi><mo>⋆</mo><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f(\sigma \star {\bf x}) = \sigma \star f({\bf x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.46528em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span></span></span></span> gives for the first component:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mn>2</mn><mo stretchy=false >)</mo></mrow></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>n</mi><mo stretchy=false >)</mo></mrow></msub><mo stretchy=false >)</mo><mo>=</mo><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> f_1(x_1,x_{\sigma(2)},\dots, x_{\sigma(n)}) = f_1(x_1,x_2,\dots, x_n). </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1052em;vertical-align:-0.3551999999999999em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mtight">2</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">n</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord >.</span></span></span></span></span> <p>For any <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">x_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, the mapping <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>↦</mo><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_2,\dots, x_n) \mapsto f_1(x_1, x_2,\dots, x_n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >↦</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> is invariant. Hence by <span class=eqref >(<a href="#eqinv">6</a>)</span>, we have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><munder><mo>∑</mo><mrow><mi>i</mi><mo mathvariant=normal >≠</mo><mn>1</mn></mrow></munder><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> f_1(x_1,x_2,\dots, x_n) = \rho\left(x_1, \sum_{i\neq 1}\phi(x_i) \right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.60004em;vertical-align:-1.55002em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.05002em;"><span style="top:-2.2500000000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-2.8100000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.05002em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.55002em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8478869999999998em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight"><span class="mrel mtight"><span class="mord mtight"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-2.7em;"><span class=pstrut  style="height:2.7em;"></span><span class="rlap mtight"><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class="mrel mtight"></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class="mrel mtight">=</span></span><span class="mord mtight">1</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.438221em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.05002em;"><span style="top:-2.2500000000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-2.8100000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.05002em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.55002em;"><span></span></span></span></span></span></span></span></span></span></span></span> <p>Now consider a permutation such that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo><mo>=</mo><mi>k</mi><mo separator=true >,</mo><mi>σ</mi><mo stretchy=false >(</mo><mi>k</mi><mo stretchy=false >)</mo><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sigma(1)=k, \sigma(k)=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class=mord >1</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo><mo>=</mo><mi>i</mi></mrow><annotation encoding="application/x-tex">\sigma(i)=i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class="mord mathdefault">i</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo mathvariant=normal >≠</mo><mn>1</mn><mo separator=true >,</mo><mi>k</mi></mrow><annotation encoding="application/x-tex">i\neq 1,k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class=mrel ><span class=mord ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=rlap ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class=mrel ></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class=mrel >=</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord >1</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span></span></span></span>, then we have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>k</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>k</mi></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> f_k(x_1,x_2,\dots, x_n) = f_1(x_k,x_2\dots, x_1,\dots x_n), </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mpunct >,</span></span></span></span></span> <p>hence <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>k</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>k</mi></msub><mo separator=true >,</mo><msub><mo>∑</mo><mrow><mi>i</mi><mo mathvariant=normal >≠</mo><mi>k</mi></mrow></msub><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex">f_k(x_1,x_2,\dots, x_n)=\rho\left(x_k, \sum_{i\neq k}\phi(x_i) \right)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.80002em;vertical-align:-0.65002em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size2">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.18639799999999984em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight"><span class="mrel mtight"><span class="mord mtight"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-2.7em;"><span class=pstrut  style="height:2.7em;"></span><span class="rlap mtight"><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class="mrel mtight"></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class="mrel mtight">=</span></span><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.43581800000000004em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size2">)</span></span></span></span></span></span> and <span class=eqref >(<a href="#eqequiv">7</a>)</span> follows.</p> <p>Hence, we only need to prove <span class=eqref >(<a href="#eqinv">6</a>)</span> and follow the proof given in <a href="https://arxiv.org/abs/1703.06114">Deep Sets</a>. We start with a crucial result stating that a set of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> real points is characterized by the first <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> moments of its empirical measure. Let see what it means for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi><mo>=</mo><mn>2</mn></mrow><annotation encoding="application/x-tex">n=2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >2</span></span></span></span>: we can recover the values of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">x_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">x_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> from the quantities <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>1</mn></msub><mo>=</mo><msub><mi>x</mi><mn>1</mn></msub><mo>+</mo><msub><mi>x</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">p_1=x_1+x_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.73333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>2</mn></msub><mo>=</mo><msubsup><mi>x</mi><mn>1</mn><mn>2</mn></msubsup><mo>+</mo><msubsup><mi>x</mi><mn>2</mn><mn>2</mn></msubsup></mrow><annotation encoding="application/x-tex">p_2=x_1^2+x_2^2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.0622159999999998em;vertical-align:-0.24810799999999997em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.0622159999999998em;vertical-align:-0.24810799999999997em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span></span></span></span>. To see that this is correct, note that</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>=</mo><msubsup><mi>x</mi><mn>1</mn><mn>2</mn></msubsup><mo>+</mo><mn>2</mn><msub><mi>x</mi><mn>1</mn></msub><msub><mi>x</mi><mn>2</mn></msub><mo>+</mo><msubsup><mi>x</mi><mn>2</mn><mn>2</mn></msubsup><mo>=</mo><msub><mi>p</mi><mn>2</mn></msub><mo>+</mo><mn>2</mn><msub><mi>x</mi><mn>1</mn></msub><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> p_1^2 = x_1^2+2x_1x_2+x_2^2 = p_2+2x_1x_2, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.79444em;vertical-align:-0.15em;"></span><span class=mord >2</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8388800000000001em;vertical-align:-0.19444em;"></span><span class=mord >2</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub><msub><mi>x</mi><mn>2</mn></msub><mo>=</mo><mfrac><mrow><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub></mrow><mn>2</mn></mfrac></mrow><annotation encoding="application/x-tex">x_1x_2 = \frac{p_1^2-p_2}{2}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.4791199999999998em;vertical-align:-0.345em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.1341199999999998em;"><span style="top:-2.6550000000000002em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.5102em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913142857142857em;"><span style="top:-2.214em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286em;"><span></span></span></span></span></span></span><span class="mbin mtight">−</span><span class="mord mtight"><span class="mord mathdefault mtight">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.345em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span>. As a result, we have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mn>1</mn></msub><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mn>2</mn></msub><mo stretchy=false >)</mo><mo>=</mo><msup><mi>x</mi><mn>2</mn></msup><mo>−</mo><msub><mi>p</mi><mn>1</mn></msub><mi>x</mi><mo>+</mo><mfrac><mrow><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub></mrow><mn>2</mn></mfrac><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> (x-x_1)(x-x_2) = x^2-p_1x+\frac{p_1^2-p_2}{2}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.9474379999999999em;vertical-align:-0.08333em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:2.177108em;vertical-align:-0.686em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.4911079999999999em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span></span></span></span></span> <p>and clearly <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">x_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">x_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> can be recovered as the roots of this polynomial whose coefficients are functions of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">p_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">p_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>. The result below extends this argument for a general <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>:</p> <div class=colbox-blue ><p><strong>Proposition</strong></p> <p>Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\Phi:[0,1]_{\leq}^n \to \mathbb{R}^{n}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span></span>, where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup><mo>=</mo><mo stretchy=false >{</mo><mi mathvariant=bold >x</mi><mo>∈</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>n</mi></msup><mo separator=true >,</mo><mtext> </mtext><msub><mi>x</mi><mn>1</mn></msub><mo>≤</mo><msub><mi>x</mi><mn>2</mn></msub><mo>≤</mo><mo>⋯</mo><mo>≤</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >}</mo></mrow><annotation encoding="application/x-tex">[0,1]_{\leq}^n = \{ {\bf x}\in [0,1]^n,\: x_1\leq x_2\leq \dots\leq x_n\}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7859700000000001em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7719400000000001em;vertical-align:-0.13597em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >}</span></span></span></span>, be defined by</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mrow><mo fence=true >(</mo><munder><mo>∑</mo><mi>i</mi></munder><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><munder><mo>∑</mo><mi>i</mi></munder><msubsup><mi>x</mi><mi>i</mi><mn>2</mn></msubsup><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><munder><mo>∑</mo><mi>i</mi></munder><msubsup><mi>x</mi><mi>i</mi><mi>n</mi></msubsup><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> \Phi(x_1,x_2,\dots, x_n) = \left( \sum_i x_1, \sum_i x_i^2,\dots, \sum_i x_i^n\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.027669em;vertical-align:-1.277669em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span></span> <p>is injective and has a continuous inverse mapping.</p></div> <p>The proof follows from <a href="https://en.wikipedia.org/wiki/Newton&#37;27s_identities">Newton&#39;s identities</a>. For <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi><mo>≤</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">k\leq n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83041em;vertical-align:-0.13597em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>, we denote by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mi>k</mi></msub><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><msubsup><mi>x</mi><mi>i</mi><mi>k</mi></msubsup></mrow><annotation encoding="application/x-tex">p_k = \sum_{i=1}^n x_i^k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.148818em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.441336em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.258664em;"><span></span></span></span></span></span></span></span></span></span> the power sums and by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mi>k</mi></msub></mrow><annotation encoding="application/x-tex">e_k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> the <a href="https://en.wikipedia.org/wiki/Elementary_symmetric_polynomial">elementary symmetric polynomials</a> &#40;note that all polynomials are function of the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">x_1,\dots, x_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>&#41;:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mn>0</mn></msub><mo>=</mo><mn>1</mn><mspace linebreak=newline ></mspace><msub><mi>e</mi><mn>1</mn></msub><mo>=</mo><munder><mo>∑</mo><mi>i</mi></munder><msub><mi>x</mi><mi>i</mi></msub><mspace linebreak=newline ></mspace><msub><mi>e</mi><mn>2</mn></msub><mo>=</mo><munder><mo>∑</mo><mrow><mi>i</mi><mo>&lt;</mo><mi>j</mi></mrow></munder><msub><mi>x</mi><mi>i</mi></msub><msub><mi>x</mi><mi>j</mi></msub><mspace linebreak=newline ></mspace><mo>…</mo></mrow><annotation encoding="application/x-tex"> e_0 = 1\\ e_1 = \sum_i x_i\\ e_2 = \sum_{i &lt; j} x_i x_j\\ \dots </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.327674em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.463782em;vertical-align:-1.413777em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8723309999999997em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">&lt;</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.413777em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.12em;vertical-align:0em;"></span><span class=minner >…</span></span></span></span></span> <p>From Newton&#39;s identities, we have for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi><mo>≤</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">k\leq n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83041em;vertical-align:-0.13597em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>,</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi><msub><mi>e</mi><mi>k</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>k</mi></munderover><mo stretchy=false >(</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msup><msub><mi>e</mi><mrow><mi>k</mi><mo>−</mo><mi>i</mi></mrow></msub><msub><mi>p</mi><mi>i</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> k e_k = \sum_{i=1}^k (-1)^{i-1}e_{k-i}p_i, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.1137820000000005em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8361130000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mopen >(</span><span class=mord >−</span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.874664em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3361079999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">−</span><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>so that, we can express the elementary symmetric polynomials from the power sums:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mn>1</mn></msub><mo>=</mo><msub><mi>p</mi><mn>1</mn></msub><mspace linebreak=newline ></mspace><mn>2</mn><msub><mi>e</mi><mn>2</mn></msub><mo>=</mo><msub><mi>e</mi><mn>1</mn></msub><msub><mi>p</mi><mn>1</mn></msub><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub><mo>=</mo><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub><mspace linebreak=newline ></mspace><mn>3</mn><msub><mi>e</mi><mn>3</mn></msub><mo>=</mo><msub><mi>e</mi><mn>2</mn></msub><msub><mi>p</mi><mn>2</mn></msub><mo>−</mo><msub><mi>e</mi><mn>1</mn></msub><msub><mi>p</mi><mn>2</mn></msub><mo>+</mo><msub><mi>p</mi><mn>3</mn></msub><mo>=</mo><mfrac><mn>1</mn><mn>2</mn></mfrac><msubsup><mi>p</mi><mn>1</mn><mn>3</mn></msubsup><mo>−</mo><mfrac><mn>3</mn><mn>2</mn></mfrac><msub><mi>p</mi><mn>1</mn></msub><msub><mi>p</mi><mn>2</mn></msub><mo>+</mo><mi>p</mi><mn>3</mn><mspace linebreak=newline ></mspace><mo>…</mo></mrow><annotation encoding="application/x-tex"> e_1 = p_1\\ 2e_2 = e_1p_1-p_2=p_1^2-p_2\\ 3e_3 = e_2p_2-e_1p_2+p_3 = \frac{1}{2}p_1^3-\frac{3}{2}p_1p_2+p3\\ \dots </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.79444em;vertical-align:-0.15em;"></span><span class=mord >2</span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.79444em;vertical-align:-0.15em;"></span><span class=mord >3</span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.00744em;vertical-align:-0.686em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:2.00744em;vertical-align:-0.686em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8388800000000001em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">p</span><span class=mord >3</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.12em;vertical-align:0em;"></span><span class=minner >…</span></span></span></span></span> <p>Note that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mo stretchy=false >(</mo><msub><mi>p</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>p</mi><mi>n</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi(x_1,x_2,\dots, x_n) = (p_1,\dots, p_n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> and since</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><munderover><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo>=</mo><msup><mi>x</mi><mi>n</mi></msup><mo>−</mo><msub><mi>e</mi><mn>1</mn></msub><msup><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msup><mo>+</mo><msub><mi>e</mi><mn>2</mn></msub><msup><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>2</mn></mrow></msup><mo>⋯</mo><mo>+</mo><mo stretchy=false >(</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mi>n</mi></msup><msub><mi>e</mi><mi>n</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> \prod_{i=1}^n (x-x_i) = x^n -e_1x^{n-1}+e_2x^{n-2}\dots + (-1)^n e_n, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∏</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7977219999999999em;vertical-align:-0.08333em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.014108em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.864108em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.014108em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.864108em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord >−</span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>if <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi({\bf x}) = \Phi({\bf y})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span> then <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>y</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\prod_{i=1}^n (x-x_i)=\prod_{i=1}^n (x-y_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.104002em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.104002em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >}</mo><mo stretchy=false >}</mo><mo>=</mo><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msub><mi>y</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>y</mi><mi>n</mi></msub><mo stretchy=false >}</mo><mo stretchy=false >}</mo></mrow><annotation encoding="application/x-tex">\{\{x_1,\dots, x_n\}\} = \{\{y_1,\dots, y_n\}\}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose >}</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose >}</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>=</mo><mi mathvariant=bold >y</mi><mo>∈</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup></mrow><annotation encoding="application/x-tex">{\bf x}={\bf y} \in [0,1]^n_{\leq}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7335400000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span></span></span></span>, showing that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi></mrow><annotation encoding="application/x-tex">\Phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span></span></span></span> is injective.</p> <p>Hence we proved that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup><mo>→</mo><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi:[0,1]^n_{\leq} \to \text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> is the image of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi></mrow><annotation encoding="application/x-tex">\Phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span></span></span></span>, is a bijection. We need now to prove that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">\Phi^{-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8141079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span> is continuous and we&#39;ll prove it directly. Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo>→</mo><mi mathvariant=bold >y</mi><mo>∈</mo><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf y}_k \to {\bf y} \in\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.63888em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7335400000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span>, we need to show that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo stretchy=false >)</mo><mo>→</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_k) \to \Phi^{-1}({\bf y})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span>. Now if <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo stretchy=false >)</mo><mo>↛</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_k) \not\to \Phi^{-1}({\bf y})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class=mord ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=rlap ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class=mrel ></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span></span><span class=base ><span class=strut  style="height:0.36687em;vertical-align:0em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span>, since <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>M</mi></msubsup></mrow><annotation encoding="application/x-tex">[0,1]^M_{\leq}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.18351em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.10903em;">M</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span></span></span></span> is compact, this means that there exists a convergent subsequence of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_{k})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><msub><mi>m</mi><mi>k</mi></msub></msub><mo stretchy=false >)</mo><mo>→</mo><mi mathvariant=bold >x</mi><mo mathvariant=normal >≠</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_{m_k}) \to {\bf x}\neq \Phi^{-1}({\bf y}) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0699679999999998em;vertical-align:-0.25586em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.15139199999999997em;"><span style="top:-2.55em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3448em;"><span style="top:-2.3487714285714287em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15122857142857138em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25586em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class=mrel ><span class=mord ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=rlap ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class=mrel ></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class=mrel >=</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span>. But by continuity of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi></mrow><annotation encoding="application/x-tex">\Phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span></span></span></span>, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >y</mi><msub><mi>m</mi><mi>k</mi></msub></msub><mo>→</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >y</mi></mrow><annotation encoding="application/x-tex">{\bf y}_{m_k} \to \Phi({\bf x}) = {\bf y}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.7002999999999999em;vertical-align:-0.25586em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.15139199999999997em;"><span style="top:-2.55em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3448em;"><span style="top:-2.3487714285714287em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15122857142857138em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25586em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.63888em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span></span></span></span>, so that we get a contradiction and hence proved the continuity of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">\Phi^{-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8141079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span>, finishing the proof of the proposition.</p> <p>We are now ready to prove <span class=eqref >(<a href="#eqinv">6</a>)</span>. Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\phi:[0,1] \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> be defined by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mo stretchy=false >(</mo><mi>x</mi><mo separator=true >,</mo><msup><mi>x</mi><mn>2</mn></msup><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msup><mi>x</mi><mi>n</mi></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\phi(x) = (x,x^2,\dots, x^n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>=</mo><mi>f</mi><mo>∘</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">\rho = f\circ \Phi^{-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >∘</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8141079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span>. Note that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>:</mo><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\rho: \text{Im}(\Phi) \to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mo>∑</mo><mi>i</mi></msub><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >x</mi><mo lspace=0em  rspace=0em >≤</mo></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\sum_{i}\phi(x_i) = \Phi({\bf x}_{\leq})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0497100000000001em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.295179em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24517899999999998em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>, where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >x</mi><mo lspace=0em  rspace=0em >≤</mo></msub></mrow><annotation encoding="application/x-tex">{\bf x}_{\leq}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.689619em;vertical-align:-0.24517899999999998em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.295179em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24517899999999998em;"><span></span></span></span></span></span></span></span></span></span> is the vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> with components sorted in non-decreasing order. Hence as soon as f is invariant, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >x</mi><mo lspace=0em  rspace=0em >≤</mo></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f({\bf x}) = f({\bf x}_{\leq})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.295179em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24517899999999998em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> so that <span class=eqref >(<a href="#eqinv">6</a>)</span> is valid. We only need to extend the function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="application/x-tex">\rho</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span></span></span></span> from the domain <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> in a continuous way. This can be done by considering the projection <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>π</mi></mrow><annotation encoding="application/x-tex">\pi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">π</span></span></span></span> on the compact <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> and define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>f</mi><mo>∘</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi>π</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\rho({\bf x}) = f\circ \Phi^{-1}(\pi({\bf x}))</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >∘</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">π</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span></span>.</p> <p>Follow on <a href="https://twitter.com/marc_lelarge">twitter</a>&#33;</p> <h2 id=thanks_for_reading ><a href="#thanks_for_reading" class=header-anchor >Thanks for reading&#33;</a></h2> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Invariant and Equivariant layers</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item active" href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=invariant_and_equivariant_layers_with_applications_to_gnn_pointnet_and_transformers ><a href="#invariant_and_equivariant_layers_with_applications_to_gnn_pointnet_and_transformers" class=header-anchor >Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></h1> <p><strong>author: <a href="https://www.di.ens.fr/~lelarge/">Marc Lelarge</a>, course: <a href="https://dataflowr.github.io/website/">dataflowr</a></strong></p> <p>date: April 23, 2021</p> <h2 id=invariant_and_equivariant_functions ><a href="#invariant_and_equivariant_functions" class=header-anchor >Invariant and equivariant functions</a></h2> <p>As shown in the <a href="https://dataflowr.github.io/website/modules/graph3/">module on GNN</a>, invariant and equivariant functions are crucial for GNN. For example, the message passing GNN &#40;MGNN&#41; layer is defined by:</p> <a id=eqgnnlayer  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn></mrow></msubsup><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><msubsup><mi>h</mi><mi>i</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo separator=true >,</mo><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msubsup><mi>h</mi><mi>j</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >}</mo><msub><mo stretchy=false >}</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></msub><mo stretchy=false >)</mo><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> h^{\ell+1}_i = f(h^\ell_i , \{\{ h^\ell_j\}\}_{j\sim i}), </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1661029999999997em;vertical-align:-0.266995em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.433005em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.266995em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.282216em;vertical-align:-0.383108em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.899108em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.1130000000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.383108em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose ><span class=mclose >}</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mpunct >,</span></span></span></span></span> <p>where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo>∼</mo><mi>j</mi></mrow><annotation encoding="application/x-tex">i\sim j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∼</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> means that nodes <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> are neighbors and the function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> should not depend on the order of the elements in the multiset <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msubsup><mi>h</mi><mi>j</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >}</mo><msub><mo stretchy=false >}</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\{\{ h^\ell_j\}\}_{j\sim i}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.2438799999999999em;vertical-align:-0.394772em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.441336em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.394772em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose ><span class=mclose >}</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span></span></span></span>. This layer is applied in parallel to all nodes &#40;with the same function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span>&#41; producing a mapping from <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=bold >h</mi><mi mathvariant=normal >ℓ</mi></msup><mo>=</mo><mo stretchy=false >(</mo><msubsup><mi>h</mi><mn>1</mn><mi mathvariant=normal >ℓ</mi></msubsup><mo>…</mo><mo separator=true >,</mo><msubsup><mi>h</mi><mi>n</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf h}^\ell = (h^\ell_1\dots, h^\ell_n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.849108em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">h</span></span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.099108em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi><mo stretchy=false >(</mo><msup><mi mathvariant=bold >h</mi><mi mathvariant=normal >ℓ</mi></msup><mo stretchy=false >)</mo><mo>=</mo><msup><mi mathvariant=bold >h</mi><mrow><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">F({\bf h}^\ell) = {\bf h}^{\ell+1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.099108em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">h</span></span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">h</span></span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">F:\mathbb{R}^n \to \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> is the number of nodes in the graph &#40;and only real hidden states are considered for simplicity&#41;. It is easy to see that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>F</mi></mrow><annotation encoding="application/x-tex">F</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.13889em;">F</span></span></span></span> is an equivariant function, i.e. permuting its input will permute its output. </p> <p>Another example of invariant and equivariant functions is given by the attention layer <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>Q</mi><mo separator=true >,</mo><mi>K</mi><mo separator=true >,</mo><mi>V</mi><mo stretchy=false >)</mo><mo>=</mo><mi>Z</mi></mrow><annotation encoding="application/x-tex">\text{Attention}(Q,K,V) = Z</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault">Q</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span></span></span></span> defined for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Q</mi></mrow><annotation encoding="application/x-tex">Q</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">Q</span></span></span></span> a tensor of row queries, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>K</mi></mrow><annotation encoding="application/x-tex">K</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span></span></span></span> the keys and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>V</mi></mrow><annotation encoding="application/x-tex">V</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span></span></span></span> the values, <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Q</mi><mo separator=true >,</mo><mi>K</mi><mo separator=true >,</mo><mi>V</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>n</mi><mo>×</mo><mi>d</mi></mrow></msup></mrow><annotation encoding="application/x-tex">Q,K,V\in \mathbb{R}^{n\times d}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">Q</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8491079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8491079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span></span> by</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Z</mi><mi>j</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><msub><mtext>softmax</mtext><mi>i</mi></msub><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>j</mi></msub><msubsup><mi>K</mi><mi>i</mi><mi>T</mi></msubsup><mo stretchy=false >)</mo><msub><mi>V</mi><mi>i</mi></msub><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> Z_j = \sum_{i=1}^n \text{softmax}_i(Q_jK_i^T) V_i. </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.969438em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-2.4530000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord >.</span></span></span></span></span> <p>The queries are obtained from a tensor <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>n</mi><mo>×</mo><mi>c</mi></mrow></msup></mrow><annotation encoding="application/x-tex">X\in \mathbb{R}^{n\times c}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.72243em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.771331em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.771331em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mathdefault mtight">c</span></span></span></span></span></span></span></span></span></span></span></span> by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>Q</mi><mo>=</mo><mi>X</mi><msubsup><mi>W</mi><mi>Q</mi><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">Q= XW_Q^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8777699999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">Q</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.25277em;vertical-align:-0.411439em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">Q</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.411439em;"><span></span></span></span></span></span></span></span></span></span> and the keys and values are obtained from a tensor <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mrow><mi>n</mi><mo>×</mo><msup><mi>c</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow></msup></mrow><annotation encoding="application/x-tex">X&#x27; \in \mathbb{R}^{n\times c&#x27;}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.790992em;vertical-align:-0.0391em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.94248em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.94248em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">×</span><span class="mord mtight"><span class="mord mathdefault mtight">c</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8278285714285715em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span></span> by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>K</mi><mo>=</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><msubsup><mi>W</mi><mi>K</mi><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">K = X&#x27; W_K^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1166619999999998em;vertical-align:-0.275331em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.07153em;">K</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.275331em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>V</mi><mo>=</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><msubsup><mi>W</mi><mi>V</mi><mi>T</mi></msubsup></mrow><annotation encoding="application/x-tex">V = X&#x27; W_V^T</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1166619999999998em;vertical-align:-0.275331em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.13889em;">W</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.424669em;margin-left:-0.13889em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.22222em;">V</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.275331em;"><span></span></span></span></span></span></span></span></span></span>. We see that when the queries are fixed, the attention layer is invariant in the pair &#40;keys, values&#41;:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Z</mi><mi>j</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><msub><mtext>softmax</mtext><mi>i</mi></msub><mo stretchy=false >(</mo><msub><mi>Q</mi><mi>j</mi></msub><msubsup><mi>K</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo></mrow><mi>T</mi></msubsup><mo stretchy=false >)</mo><msub><mi>V</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo></mrow></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Z_j = \sum_{i=1}^n \text{softmax}_{i}(Q_j K_{\sigma(i)}^T) V_{\sigma(i)}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.969438em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-2.428em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">i</span><span class="mclose mtight">)</span></span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.4469999999999999em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">i</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>hence <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo separator=true >,</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Attention}(X,X&#x27;)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.001892em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> is invariant in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup></mrow><annotation encoding="application/x-tex">X&#x27;</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span></span></span></span>. Similarly, when the pair &#40;keys, values&#41; is fixed, the attention layer is equivariant in the queries:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>Z</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>j</mi><mo stretchy=false >)</mo></mrow></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><msub><mtext>softmax</mtext><mi>i</mi></msub><mo stretchy=false >(</mo><msub><mi>Q</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>j</mi><mo stretchy=false >)</mo></mrow></msub><msubsup><mi>K</mi><mi>i</mi><mi>T</mi></msubsup><mo stretchy=false >)</mo><msub><mi>V</mi><mi>i</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> Z_{\sigma(j)} = \sum_{i=1}^n \text{softmax}_{i}(Q_{\sigma(j)}K_{i}^T) V_{i}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.03853em;vertical-align:-0.3551999999999999em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">Z</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord text"><span class=mord >softmax</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">Q</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07153em;">K</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913309999999999em;"><span style="top:-2.4530000000000003em;margin-left:-0.07153em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.13889em;">T</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.22222em;">V</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.22222em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>hence <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo separator=true >,</mo><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Attention}(X,X&#x27;)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.001892em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> is equivariant in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi></mrow><annotation encoding="application/x-tex">X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span>. If <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>X</mi><mo mathvariant=normal  lspace=0em  rspace=0em >′</mo></msup><mo>=</mo><mi>X</mi></mrow><annotation encoding="application/x-tex">X&#x27;=X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.751892em;vertical-align:0em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.751892em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">′</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span>, we get the self-attention layer so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>SelfAttention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo stretchy=false >)</mo><mo>=</mo><mtext>Attention</mtext><mo stretchy=false >(</mo><mi>X</mi><mo separator=true >,</mo><mi>X</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{SelfAttention}(X) = \text{Attention}(X,X)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >SelfAttention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Attention</span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span><span class=mclose >)</span></span></span></span> is equivariant in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>X</mi></mrow><annotation encoding="application/x-tex">X</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.07847em;">X</span></span></span></span>.</p> <p>In this post, we will <strong>characterize invariant and equivariant functions</strong> following the ideas given in the paper <a href="https://arxiv.org/abs/1703.06114">Deep Sets</a>.</p> <h2 id=representation_of_invariant_and_equivariant_functions ><a href="#representation_of_invariant_and_equivariant_functions" class=header-anchor >Representation of invariant and equivariant functions</a></h2> <p>We start with some definitions.</p> <p>For a vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>∈</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">{\bf x} = (x_1,\dots, x_n)\in \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and a permutation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma \in \mathcal{S}_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we define</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo>=</mo><mo stretchy=false >(</mo><msub><mi>x</mi><mrow><msup><mi>σ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo></mrow></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><msup><mi>σ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi>n</mi><mo stretchy=false >)</mo></mrow></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex"> \sigma \star {\bf x} = (x_{\sigma^{-1}(1)},\dots, x_{\sigma^{-1}(n)}) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.46528em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1052em;vertical-align:-0.3551999999999999em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7463142857142857em;"><span style="top:-2.786em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class="mopen mtight">(</span><span class="mord mtight">1</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7463142857142857em;"><span style="top:-2.786em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">n</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span></span> <p><strong>Definitions:</strong></p> <ul> <li><p>A function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">f:\mathbb{R}^n\to \mathbb{R}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span></span></span></span></span> is <strong>invariant</strong> if for all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> and all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma \in \mathcal{S}_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f(\sigma \star {\bf x}) = f({\bf x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span></span></span></span>.</p> <li><p>A function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">f:\mathbb{R}^n\to \mathbb{R}^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> is <strong>equivariant</strong> if for all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> and all <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma \in \mathcal{S}_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>σ</mi><mo>⋆</mo><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f(\sigma \star {\bf x}) = \sigma \star f({\bf x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.46528em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span></span></span></span>.</p> </ul> <p>We can now state our main result:</p> <div class=colbox-blue ><p><strong>Theorem</strong></p> <ul> <li><p><strong>invariant case:</strong> let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">f:[0,1]^n \to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> be a continuous function. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> is invariant if and only if there are continuous functions <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\phi: [0,1] \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>:</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\rho: \mathbb R^n\to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> such that</p> </ul> <a id=eqinv  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> f({\bf x}) = \rho\left( \sum_{i=1}^n \phi(x_i)\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.027669em;vertical-align:-1.277669em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span></span> <ul> <li><p><strong>equivariant case:</strong> let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>n</mi></msup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">f:[0,1]^n \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> be a continuous function. <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> is equivariant if and only if there are continuous functions <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\phi: [0,1] \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>×</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\rho: [0,1]\times \mathbb R^n\to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >×</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> such that</p> </ul> <a id=eqequiv  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>j</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>j</mi></msub><mo separator=true >,</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> f_j({\bf x}) = \rho\left( x_j, \sum_{i=1}^n \phi(x_i)\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.036108em;vertical-align:-0.286108em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.027669em;vertical-align:-1.277669em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span></span></div> <p>We give some remarks before providing the proof below. For the sake of simplicity, we consider here a fixed number of points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> on the unit interval <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo></mrow><annotation encoding="application/x-tex">[0,1]</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span></span></span></span>. For results with a varying number of points, see <a href="https://arxiv.org/abs/1901.09006">On the Limitations of Representing Functions on Sets</a> and for points in higher dimension <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>d</mi></msup></mrow><annotation encoding="application/x-tex">[0,1]^d</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.099108em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span></span></span></span></span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi><mo>&gt;</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">d&gt;1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.73354em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">d</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >&gt;</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span>, see <a href="https://arxiv.org/abs/1910.02421">On Universal Equivariant Set Networks</a> and <a href="https://arxiv.org/abs/2006.15646">Expressive Power of Invariant and Equivariant Graph Neural Networks</a>.</p> <p>Our proof will make the mapping <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi></mrow><annotation encoding="application/x-tex">\phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span></span></span></span> explicit and it will not depend on the function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span>. The mapping <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi></mrow><annotation encoding="application/x-tex">\phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span></span></span></span> can be seen as an embedding of the points in <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo></mrow><annotation encoding="application/x-tex">[0,1]</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span></span></span></span> in a space of high-dimension. Indeed this embedding space has to be of dimension at least the number of points <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> in order to ensure universality. This is an important remark as in learning scenario, the size of the embedding is typically fixed and hence will limit the expressiveness of the algorithm.</p> <p>Coming back to the GNN layer <span class=eqref >(<a href="#eqgnnlayer">1</a>)</span>, our result on the invariant case tells us that we can always rewrite it as:</p> <a id=eqgnnlayer2  class=anchor ></a><span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>h</mi><mi>i</mi><mrow><mi mathvariant=normal >ℓ</mi><mo>+</mo><mn>1</mn></mrow></msubsup><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msubsup><mi>h</mi><mi>i</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo separator=true >,</mo><munder><mo>∑</mo><mrow><mi>j</mi><mo>∼</mo><mi>i</mi></mrow></munder><mi>ϕ</mi><mo stretchy=false >(</mo><msubsup><mi>h</mi><mi>j</mi><mi mathvariant=normal >ℓ</mi></msubsup><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> h^{\ell+1}_i =\rho\left( h_i^{\ell}, \sum_{j\sim i} \phi(h^\ell_j)\right), </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1661029999999997em;vertical-align:-0.266995em;"></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.433005em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span><span class="mbin mtight">+</span><span class="mord mtight">1</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.266995em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.163777em;vertical-align:-1.413777em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8991079999999998em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">ℓ</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8723309999999997em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span><span class="mrel mtight">∼</span><span class="mord mathdefault mtight">i</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.413777em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">h</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.899108em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span><span style="top:-3.1130000000000004em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">ℓ</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.383108em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span></span></span></span></span> <p>and the dimension of the embedding <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo stretchy=false >(</mo><mi>h</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\phi(h)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class="mord mathdefault">h</span><span class=mclose >)</span></span></span></span> needs to be of the same order as the maximum degree in the graph. Note that <span class=eqref >(<a href="#eqgnnlayer2">8</a>)</span> is not of the form of <span class=eqref >(<a href="#eqequiv">7</a>)</span> as the sum inside the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="application/x-tex">\rho</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span></span></span></span> function is taken only on neighbors. Indeed, we know that message passing GNN are not universal &#40;see <a href="https://arxiv.org/abs/2006.15646">Expressive Power of Invariant and Equivariant Graph Neural Networks</a>&#41;.</p> <p>As a last remark, note that the original <a href="https://arxiv.org/abs/1612.00593">PointNet</a> architecture <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> is of the form <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>i</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f_i({\bf x}) = \rho(x_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> which is not universal equivariant. Indeed, it is impossible to approximate the equivariant function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>g</mi><mi>i</mi></msub><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><msub><mo>∑</mo><mi>i</mi></msub><msub><mi>x</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">g_i({\bf x}) = \sum_i x_i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.0497100000000001em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> as shown below &#40;we denote <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >e</mi><mn>1</mn></msub><mo>=</mo><mo stretchy=false >(</mo><mn>1</mn><mo separator=true >,</mo><mn>0</mn><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><mn>0</mn><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf e}_1=(1,0,\dots,0)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.59444em;vertical-align:-0.15em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">e</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord >1</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >0</span><span class=mclose >)</span></span></span></span>&#41;:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >∥</mi><mi>f</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><mo>−</mo><mi>g</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo>=</mo><mi>n</mi><mi>ρ</mi><mo stretchy=false >(</mo><mn>0</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mspace linebreak=newline ></mspace><mi mathvariant=normal >∥</mi><mi>f</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >e</mi><mn>1</mn></msub><mo stretchy=false >)</mo><mo>−</mo><mi>g</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >e</mi><mn>1</mn></msub><mo stretchy=false >)</mo><msup><mi mathvariant=normal >∥</mi><mn>2</mn></msup><mo>=</mo><mo stretchy=false >(</mo><mi>ρ</mi><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>+</mo><mo stretchy=false >(</mo><mi>n</mi><mo>−</mo><mn>1</mn><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi>ρ</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo>≥</mo><mo stretchy=false >(</mo><mi>n</mi><mo>−</mo><mn>1</mn><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi>ρ</mi><mo stretchy=false >(</mo><mn>0</mn><mo stretchy=false >)</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mn>2</mn></msup><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> \|f(0) - g(0)\|^2 = n \rho(0)^2\\ \|f({\bf e}_1) -g({\bf e}_1)\|^2 = (\rho(1)-1)^2 + (n-1)(\rho(0)-1)^2\geq (n-1)(\rho(0)-1)^2, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord mathdefault">n</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >∥</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">e</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">e</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord ><span class=mord >∥</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >1</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose >)</span><span class=mopen >(</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≥</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose >)</span><span class=mopen >(</span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord >0</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1141079999999999em;vertical-align:-0.25em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>and these quantities cannot be small together. Hence PointNet is not universal equivariant but as shown in <a href="https://arxiv.org/abs/1910.02421">On Universal Equivariant Set Networks</a>, modifying PointNet by adding the term <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex"> \sum_{i=1}^n \phi(x_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.104002em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> inside the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="application/x-tex">\rho</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span></span></span></span> function as in <span class=eqref >(<a href="#eqequiv">7</a>)</span> makes it universal equivariant. We refer to <a href="https://arxiv.org/abs/1912.10077">Are Transformers universal approximators of sequence-to-sequence functions?</a> for similar results about transformers based on self-attention.</p> <h2 id=proof_of_the_theorem ><a href="#proof_of_the_theorem" class=header-anchor >Proof of the Theorem</a></h2> <p>We first show that the equivariant case is not more difficult than the invariant case. Assume that we proved the invariant case. Consider a permutation <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo>∈</mo><msub><mi mathvariant=script >S</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">\sigma\in \mathcal S_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.83333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathcal" style="margin-right:0.075em;">S</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.075em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> such that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sigma(1)=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class=mord >1</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi>σ</mi><mo>⋆</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>σ</mi><mo>⋆</mo><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f(\sigma \star {\bf x}) = \sigma \star f({\bf x})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.46528em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >⋆</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span></span></span></span> gives for the first component:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mn>2</mn><mo stretchy=false >)</mo></mrow></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>n</mi><mo stretchy=false >)</mo></mrow></msub><mo stretchy=false >)</mo><mo>=</mo><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mi mathvariant=normal >.</mi></mrow><annotation encoding="application/x-tex"> f_1(x_1,x_{\sigma(2)},\dots, x_{\sigma(n)}) = f_1(x_1,x_2,\dots, x_n). </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1052em;vertical-align:-0.3551999999999999em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mtight">2</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.34480000000000005em;"><span style="top:-2.5198em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03588em;">σ</span><span class="mopen mtight">(</span><span class="mord mathdefault mtight">n</span><span class="mclose mtight">)</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.3551999999999999em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mord >.</span></span></span></span></span> <p>For any <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">x_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>, the mapping <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>↦</mo><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">(x_2,\dots, x_n) \mapsto f_1(x_1, x_2,\dots, x_n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >↦</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> is invariant. Hence by <span class=eqref >(<a href="#eqinv">6</a>)</span>, we have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><munder><mo>∑</mo><mrow><mi>i</mi><mo mathvariant=normal >≠</mo><mn>1</mn></mrow></munder><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> f_1(x_1,x_2,\dots, x_n) = \rho\left(x_1, \sum_{i\neq 1}\phi(x_i) \right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.60004em;vertical-align:-1.55002em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class=mopen ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.05002em;"><span style="top:-2.2500000000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎝</span></span></span><span style="top:-2.8100000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎜</span></span></span><span style="top:-4.05002em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎛</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.55002em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8478869999999998em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight"><span class="mrel mtight"><span class="mord mtight"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-2.7em;"><span class=pstrut  style="height:2.7em;"></span><span class="rlap mtight"><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class="mrel mtight"></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class="mrel mtight">=</span></span><span class="mord mtight">1</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.438221em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mclose ><span class="delimsizing mult"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:2.05002em;"><span style="top:-2.2500000000000004em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎠</span></span></span><span style="top:-2.8100000000000005em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎟</span></span></span><span style="top:-4.05002em;"><span class=pstrut  style="height:3.1550000000000002em;"></span><span class="delimsizinginner delim-size4"><span>⎞</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.55002em;"><span></span></span></span></span></span></span></span></span></span></span></span> <p>Now consider a permutation such that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo stretchy=false >(</mo><mn>1</mn><mo stretchy=false >)</mo><mo>=</mo><mi>k</mi><mo separator=true >,</mo><mi>σ</mi><mo stretchy=false >(</mo><mi>k</mi><mo stretchy=false >)</mo><mo>=</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">\sigma(1)=k, \sigma(k)=1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class=mord >1</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>σ</mi><mo stretchy=false >(</mo><mi>i</mi><mo stretchy=false >)</mo><mo>=</mo><mi>i</mi></mrow><annotation encoding="application/x-tex">\sigma(i)=i</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">σ</span><span class=mopen >(</span><span class="mord mathdefault">i</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo mathvariant=normal >≠</mo><mn>1</mn><mo separator=true >,</mo><mi>k</mi></mrow><annotation encoding="application/x-tex">i\neq 1,k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">i</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class=mrel ><span class=mord ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=rlap ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class=mrel ></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class=mrel >=</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord >1</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span></span></span></span>, then we have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>k</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><msub><mi>f</mi><mn>1</mn></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mi>k</mi></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> f_k(x_1,x_2,\dots, x_n) = f_1(x_k,x_2\dots, x_1,\dots x_n), </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mpunct >,</span></span></span></span></span> <p>hence <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>f</mi><mi>k</mi></msub><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi>ρ</mi><mrow><mo fence=true >(</mo><msub><mi>x</mi><mi>k</mi></msub><mo separator=true >,</mo><msub><mo>∑</mo><mrow><mi>i</mi><mo mathvariant=normal >≠</mo><mi>k</mi></mrow></msub><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex">f_k(x_1,x_2,\dots, x_n)=\rho\left(x_k, \sum_{i\neq k}\phi(x_i) \right)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:-0.10764em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.80002em;vertical-align:-0.65002em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size2">(</span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.18639799999999984em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight"><span class="mrel mtight"><span class="mord mtight"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-2.7em;"><span class=pstrut  style="height:2.7em;"></span><span class="rlap mtight"><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class="mrel mtight"></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class="mrel mtight">=</span></span><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.43581800000000004em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size2">)</span></span></span></span></span></span> and <span class=eqref >(<a href="#eqequiv">7</a>)</span> follows.</p> <p>Hence, we only need to prove <span class=eqref >(<a href="#eqinv">6</a>)</span> and follow the proof given in <a href="https://arxiv.org/abs/1703.06114">Deep Sets</a>. We start with a crucial result stating that a set of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> real points is characterized by the first <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span> moments of its empirical measure. Let see what it means for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi><mo>=</mo><mn>2</mn></mrow><annotation encoding="application/x-tex">n=2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >2</span></span></span></span>: we can recover the values of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">x_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">x_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> from the quantities <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>1</mn></msub><mo>=</mo><msub><mi>x</mi><mn>1</mn></msub><mo>+</mo><msub><mi>x</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">p_1=x_1+x_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.73333em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>2</mn></msub><mo>=</mo><msubsup><mi>x</mi><mn>1</mn><mn>2</mn></msubsup><mo>+</mo><msubsup><mi>x</mi><mn>2</mn><mn>2</mn></msubsup></mrow><annotation encoding="application/x-tex">p_2=x_1^2+x_2^2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.0622159999999998em;vertical-align:-0.24810799999999997em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.0622159999999998em;vertical-align:-0.24810799999999997em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span></span></span></span>. To see that this is correct, note that</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>=</mo><msubsup><mi>x</mi><mn>1</mn><mn>2</mn></msubsup><mo>+</mo><mn>2</mn><msub><mi>x</mi><mn>1</mn></msub><msub><mi>x</mi><mn>2</mn></msub><mo>+</mo><msubsup><mi>x</mi><mn>2</mn><mn>2</mn></msubsup><mo>=</mo><msub><mi>p</mi><mn>2</mn></msub><mo>+</mo><mn>2</mn><msub><mi>x</mi><mn>1</mn></msub><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> p_1^2 = x_1^2+2x_1x_2+x_2^2 = p_2+2x_1x_2, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.79444em;vertical-align:-0.15em;"></span><span class=mord >2</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8388800000000001em;vertical-align:-0.19444em;"></span><span class=mord >2</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub><msub><mi>x</mi><mn>2</mn></msub><mo>=</mo><mfrac><mrow><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub></mrow><mn>2</mn></mfrac></mrow><annotation encoding="application/x-tex">x_1x_2 = \frac{p_1^2-p_2}{2}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.4791199999999998em;vertical-align:-0.345em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.1341199999999998em;"><span style="top:-2.6550000000000002em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.5102em;"><span class=pstrut  style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8913142857142857em;"><span style="top:-2.214em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">1</span></span></span><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286em;"><span></span></span></span></span></span></span><span class="mbin mtight">−</span><span class="mord mtight"><span class="mord mathdefault mtight">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31731428571428577em;"><span style="top:-2.357em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.143em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.345em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span>. As a result, we have</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mn>1</mn></msub><mo stretchy=false >)</mo><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mn>2</mn></msub><mo stretchy=false >)</mo><mo>=</mo><msup><mi>x</mi><mn>2</mn></msup><mo>−</mo><msub><mi>p</mi><mn>1</mn></msub><mi>x</mi><mo>+</mo><mfrac><mrow><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub></mrow><mn>2</mn></mfrac><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> (x-x_1)(x-x_2) = x^2-p_1x+\frac{p_1^2-p_2}{2}, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.9474379999999999em;vertical-align:-0.08333em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:2.177108em;vertical-align:-0.686em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.4911079999999999em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-2.4518920000000004em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24810799999999997em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mpunct >,</span></span></span></span></span> <p>and clearly <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">x_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">x_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> can be recovered as the roots of this polynomial whose coefficients are functions of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>1</mn></msub></mrow><annotation encoding="application/x-tex">p_1</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mn>2</mn></msub></mrow><annotation encoding="application/x-tex">p_2</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>. The result below extends this argument for a general <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>n</mi></mrow><annotation encoding="application/x-tex">n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>:</p> <div class=colbox-blue ><p><strong>Proposition</strong></p> <p>Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\Phi:[0,1]_{\leq}^n \to \mathbb{R}^{n}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbb">R</span></span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span></span>, where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup><mo>=</mo><mo stretchy=false >{</mo><mi mathvariant=bold >x</mi><mo>∈</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msup><mo stretchy=false >]</mo><mi>n</mi></msup><mo separator=true >,</mo><mtext> </mtext><msub><mi>x</mi><mn>1</mn></msub><mo>≤</mo><msub><mi>x</mi><mn>2</mn></msub><mo>≤</mo><mo>⋯</mo><mo>≤</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >}</mo></mrow><annotation encoding="application/x-tex">[0,1]_{\leq}^n = \{ {\bf x}\in [0,1]^n,\: x_1\leq x_2\leq \dots\leq x_n\}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7859700000000001em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7719400000000001em;vertical-align:-0.13597em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >}</span></span></span></span>, be defined by</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mrow><mo fence=true >(</mo><munder><mo>∑</mo><mi>i</mi></munder><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><munder><mo>∑</mo><mi>i</mi></munder><msubsup><mi>x</mi><mi>i</mi><mn>2</mn></msubsup><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><munder><mo>∑</mo><mi>i</mi></munder><msubsup><mi>x</mi><mi>i</mi><mi>n</mi></msubsup><mo fence=true >)</mo></mrow></mrow><annotation encoding="application/x-tex"> \Phi(x_1,x_2,\dots, x_n) = \left( \sum_i x_1, \sum_i x_i^2,\dots, \sum_i x_i^n\right) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.027669em;vertical-align:-1.277669em;"></span><span class=minner ><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size4">(</span></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size4">)</span></span></span></span></span></span></span> <p>is injective and has a continuous inverse mapping.</p></div> <p>The proof follows from <a href="https://en.wikipedia.org/wiki/Newton&#37;27s_identities">Newton&#39;s identities</a>. For <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi><mo>≤</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">k\leq n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83041em;vertical-align:-0.13597em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>, we denote by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>p</mi><mi>k</mi></msub><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><msubsup><mi>x</mi><mi>i</mi><mi>k</mi></msubsup></mrow><annotation encoding="application/x-tex">p_k = \sum_{i=1}^n x_i^k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.148818em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.849108em;"><span style="top:-2.441336em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.258664em;"><span></span></span></span></span></span></span></span></span></span> the power sums and by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mi>k</mi></msub></mrow><annotation encoding="application/x-tex">e_k</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> the <a href="https://en.wikipedia.org/wiki/Elementary_symmetric_polynomial">elementary symmetric polynomials</a> &#40;note that all polynomials are function of the <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub></mrow><annotation encoding="application/x-tex">x_1,\dots, x_n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>&#41;:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mn>0</mn></msub><mo>=</mo><mn>1</mn><mspace linebreak=newline ></mspace><msub><mi>e</mi><mn>1</mn></msub><mo>=</mo><munder><mo>∑</mo><mi>i</mi></munder><msub><mi>x</mi><mi>i</mi></msub><mspace linebreak=newline ></mspace><msub><mi>e</mi><mn>2</mn></msub><mo>=</mo><munder><mo>∑</mo><mrow><mi>i</mi><mo>&lt;</mo><mi>j</mi></mrow></munder><msub><mi>x</mi><mi>i</mi></msub><msub><mi>x</mi><mi>j</mi></msub><mspace linebreak=newline ></mspace><mo>…</mo></mrow><annotation encoding="application/x-tex"> e_0 = 1\\ e_1 = \sum_i x_i\\ e_2 = \sum_{i &lt; j} x_i x_j\\ \dots </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">0</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.64444em;vertical-align:0em;"></span><span class=mord >1</span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.327674em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.0500050000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.463782em;vertical-align:-1.413777em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.050005em;"><span style="top:-1.8723309999999997em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">&lt;</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span style="top:-3.0500049999999996em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.413777em;"><span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.286108em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.12em;vertical-align:0em;"></span><span class=minner >…</span></span></span></span></span> <p>From Newton&#39;s identities, we have for <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi><mo>≤</mo><mi>n</mi></mrow><annotation encoding="application/x-tex">k\leq n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.83041em;vertical-align:-0.13597em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >≤</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">n</span></span></span></span>,</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>k</mi><msub><mi>e</mi><mi>k</mi></msub><mo>=</mo><munderover><mo>∑</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>k</mi></munderover><mo stretchy=false >(</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mrow><mi>i</mi><mo>−</mo><mn>1</mn></mrow></msup><msub><mi>e</mi><mrow><mi>k</mi><mo>−</mo><mi>i</mi></mrow></msub><msub><mi>p</mi><mi>i</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> k e_k = \sum_{i=1}^k (-1)^{i-1}e_{k-i}p_i, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.84444em;vertical-align:-0.15em;"></span><span class="mord mathdefault" style="margin-right:0.03148em;">k</span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:3.1137820000000005em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.8361130000000003em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mopen >(</span><span class=mord >−</span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.874664em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3361079999999999em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span><span class="mbin mtight">−</span><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.208331em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>so that, we can express the elementary symmetric polynomials from the power sums:</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mn>1</mn></msub><mo>=</mo><msub><mi>p</mi><mn>1</mn></msub><mspace linebreak=newline ></mspace><mn>2</mn><msub><mi>e</mi><mn>2</mn></msub><mo>=</mo><msub><mi>e</mi><mn>1</mn></msub><msub><mi>p</mi><mn>1</mn></msub><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub><mo>=</mo><msubsup><mi>p</mi><mn>1</mn><mn>2</mn></msubsup><mo>−</mo><msub><mi>p</mi><mn>2</mn></msub><mspace linebreak=newline ></mspace><mn>3</mn><msub><mi>e</mi><mn>3</mn></msub><mo>=</mo><msub><mi>e</mi><mn>2</mn></msub><msub><mi>p</mi><mn>2</mn></msub><mo>−</mo><msub><mi>e</mi><mn>1</mn></msub><msub><mi>p</mi><mn>2</mn></msub><mo>+</mo><msub><mi>p</mi><mn>3</mn></msub><mo>=</mo><mfrac><mn>1</mn><mn>2</mn></mfrac><msubsup><mi>p</mi><mn>1</mn><mn>3</mn></msubsup><mo>−</mo><mfrac><mn>3</mn><mn>2</mn></mfrac><msub><mi>p</mi><mn>1</mn></msub><msub><mi>p</mi><mn>2</mn></msub><mo>+</mo><mi>p</mi><mn>3</mn><mspace linebreak=newline ></mspace><mo>…</mo></mrow><annotation encoding="application/x-tex"> e_1 = p_1\\ 2e_2 = e_1p_1-p_2=p_1^2-p_2\\ 3e_3 = e_2p_2-e_1p_2+p_3 = \frac{1}{2}p_1^3-\frac{3}{2}p_1p_2+p3\\ \dots </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.58056em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.79444em;vertical-align:-0.15em;"></span><span class=mord >2</span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.1111079999999998em;vertical-align:-0.247em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.79444em;vertical-align:-0.15em;"></span><span class=mord >3</span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.7777700000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:2.00744em;vertical-align:-0.686em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8641079999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.247em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:2.00744em;vertical-align:-0.686em;"></span><span class=mord ><span class="mopen nulldelimiter"></span><span class=mfrac ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.32144em;"><span style="top:-2.314em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >2</span></span></span><span style="top:-3.23em;"><span class=pstrut  style="height:3em;"></span><span class=frac-line  style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class=pstrut  style="height:3em;"></span><span class=mord ><span class=mord >3</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8388800000000001em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">p</span><span class=mord >3</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span></span><span class="mspace newline"></span><span class=base ><span class=strut  style="height:0.12em;vertical-align:0em;"></span><span class=minner >…</span></span></span></span></span> <p>Note that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><msub><mi>x</mi><mn>2</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mo stretchy=false >(</mo><msub><mi>p</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>p</mi><mi>n</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi(x_1,x_2,\dots, x_n) = (p_1,\dots, p_n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">p</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> and since</p> <span class=katex-display ><span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><munderover><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></munderover><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo>=</mo><msup><mi>x</mi><mi>n</mi></msup><mo>−</mo><msub><mi>e</mi><mn>1</mn></msub><msup><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>1</mn></mrow></msup><mo>+</mo><msub><mi>e</mi><mn>2</mn></msub><msup><mi>x</mi><mrow><mi>n</mi><mo>−</mo><mn>2</mn></mrow></msup><mo>⋯</mo><mo>+</mo><mo stretchy=false >(</mo><mo>−</mo><mn>1</mn><msup><mo stretchy=false >)</mo><mi>n</mi></msup><msub><mi>e</mi><mi>n</mi></msub><mo separator=true >,</mo></mrow><annotation encoding="application/x-tex"> \prod_{i=1}^n (x-x_i) = x^n -e_1x^{n-1}+e_2x^{n-2}\dots + (-1)^n e_n, </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:2.929066em;vertical-align:-1.277669em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:1.6513970000000002em;"><span style="top:-1.872331em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.050005em;"><span class=pstrut  style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∏</span></span></span><span style="top:-4.3000050000000005em;margin-left:0em;"><span class=pstrut  style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:1.277669em;"><span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7977219999999999em;vertical-align:-0.08333em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.014108em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.864108em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.014108em;vertical-align:-0.15em;"></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.864108em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">n</span><span class="mbin mtight">−</span><span class="mord mtight">2</span></span></span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >⋯</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >+</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class=mord >−</span><span class=mord >1</span><span class=mclose ><span class=mclose >)</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.7143919999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mord ><span class="mord mathdefault">e</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span></span></span></span></span> <p>if <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi({\bf x}) = \Phi({\bf y})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span> then <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo>=</mo><msubsup><mo>∏</mo><mrow><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>n</mi></msubsup><mo stretchy=false >(</mo><mi>x</mi><mo>−</mo><msub><mi>y</mi><mi>i</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\prod_{i=1}^n (x-x_i)=\prod_{i=1}^n (x-y_i)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.104002em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.104002em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∏</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.804292em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.2029em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >−</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> so that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msub><mi>x</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>x</mi><mi>n</mi></msub><mo stretchy=false >}</mo><mo stretchy=false >}</mo><mo>=</mo><mo stretchy=false >{</mo><mo stretchy=false >{</mo><msub><mi>y</mi><mn>1</mn></msub><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msub><mi>y</mi><mi>n</mi></msub><mo stretchy=false >}</mo><mo stretchy=false >}</mo></mrow><annotation encoding="application/x-tex">\{\{x_1,\dots, x_n\}\} = \{\{y_1,\dots, y_n\}\}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose >}</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >{</span><span class=mopen >{</span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.151392em;"><span style="top:-2.5500000000000003em;margin-left:-0.03588em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >}</span><span class=mclose >}</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi><mo>=</mo><mi mathvariant=bold >y</mi><mo>∈</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup></mrow><annotation encoding="application/x-tex">{\bf x}={\bf y} \in [0,1]^n_{\leq}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7335400000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span></span></span></span>, showing that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi></mrow><annotation encoding="application/x-tex">\Phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span></span></span></span> is injective.</p> <p>Hence we proved that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>n</mi></msubsup><mo>→</mo><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi:[0,1]^n_{\leq} \to \text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.092179em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> is the image of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi></mrow><annotation encoding="application/x-tex">\Phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span></span></span></span>, is a bijection. We need now to prove that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">\Phi^{-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8141079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span> is continuous and we&#39;ll prove it directly. Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo>→</mo><mi mathvariant=bold >y</mi><mo>∈</mo><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">{\bf y}_k \to {\bf y} \in\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.63888em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.7335400000000001em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >∈</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span>, we need to show that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo stretchy=false >)</mo><mo>→</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_k) \to \Phi^{-1}({\bf y})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span>. Now if <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo stretchy=false >)</mo><mo>↛</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_k) \not\to \Phi^{-1}({\bf y})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class=mord ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=rlap ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class=mrel ></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span></span><span class=base ><span class=strut  style="height:0.36687em;vertical-align:0em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span>, since <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><msubsup><mo stretchy=false >]</mo><mo lspace=0em  rspace=0em >≤</mo><mi>M</mi></msubsup></mrow><annotation encoding="application/x-tex">[0,1]^M_{\leq}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.18351em;vertical-align:-0.342179em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose ><span class=mclose >]</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.8413309999999999em;"><span style="top:-2.4530000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.10903em;">M</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.342179em;"><span></span></span></span></span></span></span></span></span></span> is compact, this means that there exists a convergent subsequence of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><mi>k</mi></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_{k})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> with <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><msub><mi mathvariant=bold >y</mi><msub><mi>m</mi><mi>k</mi></msub></msub><mo stretchy=false >)</mo><mo>→</mo><mi mathvariant=bold >x</mi><mo mathvariant=normal >≠</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi mathvariant=bold >y</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\Phi^{-1}({\bf y}_{m_k}) \to {\bf x}\neq \Phi^{-1}({\bf y}) </annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0699679999999998em;vertical-align:-0.25586em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.15139199999999997em;"><span style="top:-2.55em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3448em;"><span style="top:-2.3487714285714287em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15122857142857138em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25586em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel ><span class=mrel ><span class=mord ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.69444em;"><span style="top:-3em;"><span class=pstrut  style="height:3em;"></span><span class=rlap ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class=inner ><span class=mrel ></span></span><span class=fix ></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.19444em;"><span></span></span></span></span></span></span><span class=mrel >=</span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=mclose >)</span></span></span></span>. But by continuity of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=normal >Φ</mi></mrow><annotation encoding="application/x-tex">\Phi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68333em;vertical-align:0em;"></span><span class=mord >Φ</span></span></span></span>, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >y</mi><msub><mi>m</mi><mi>k</mi></msub></msub><mo>→</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=bold >y</mi></mrow><annotation encoding="application/x-tex">{\bf y}_{m_k} \to \Phi({\bf x}) = {\bf y}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.7002999999999999em;vertical-align:-0.25586em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.15139199999999997em;"><span style="top:-2.55em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">m</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.3448em;"><span style="top:-2.3487714285714287em;margin-left:0em;margin-right:0.07142857142857144em;"><span class=pstrut  style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15122857142857138em;"><span></span></span></span></span></span></span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.25586em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.63888em;vertical-align:-0.19444em;"></span><span class=mord ><span class=mord ><span class="mord mathbf" style="margin-right:0.01597em;">y</span></span></span></span></span></span>, so that we get a contradiction and hence proved the continuity of <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">\Phi^{-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8141079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span>, finishing the proof of the proposition.</p> <p>We are now ready to prove <span class=eqref >(<a href="#eqinv">6</a>)</span>. Let <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo>:</mo><mo stretchy=false >[</mo><mn>0</mn><mo separator=true >,</mo><mn>1</mn><mo stretchy=false >]</mo><mo>→</mo><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\phi:[0,1] \to \mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ϕ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mopen >[</span><span class=mord >0</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord >1</span><span class=mclose >]</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> be defined by <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϕ</mi><mo stretchy=false >(</mo><mi>x</mi><mo stretchy=false >)</mo><mo>=</mo><mo stretchy=false >(</mo><mi>x</mi><mo separator=true >,</mo><msup><mi>x</mi><mn>2</mn></msup><mo separator=true >,</mo><mo>…</mo><mo separator=true >,</mo><msup><mi>x</mi><mi>n</mi></msup><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\phi(x) = (x,x^2,\dots, x^n)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mopen >(</span><span class="mord mathdefault">x</span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=minner >…</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mpunct >,</span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>=</mo><mi>f</mi><mo>∘</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup></mrow><annotation encoding="application/x-tex">\rho = f\circ \Phi^{-1}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >∘</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:0.8141079999999999em;vertical-align:0em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span></span></span></span>. Note that <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo>:</mo><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo><mo>→</mo><mi mathvariant=double-struck >R</mi></mrow><annotation encoding="application/x-tex">\rho: \text{Im}(\Phi) \to \mathbb R</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >:</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >→</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class="mord mathbb">R</span></span></span></span> and <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mo>∑</mo><mi>i</mi></msub><mi>ϕ</mi><mo stretchy=false >(</mo><msub><mi>x</mi><mi>i</mi></msub><mo stretchy=false >)</mo><mo>=</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >x</mi><mo lspace=0em  rspace=0em >≤</mo></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\sum_{i}\phi(x_i) = \Phi({\bf x}_{\leq})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1.0497100000000001em;vertical-align:-0.29971000000000003em;"></span><span class=mop ><span class="mop op-symbol small-op" style="position:relative;top:-0.0000050000000000050004em;">∑</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.16195399999999993em;"><span style="top:-2.40029em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.29971000000000003em;"><span></span></span></span></span></span></span><span class=mspace  style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">ϕ</span><span class=mopen >(</span><span class=mord ><span class="mord mathdefault">x</span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.15em;"><span></span></span></span></span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class=mord >Φ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.295179em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24517899999999998em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span>, where <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi mathvariant=bold >x</mi><mo lspace=0em  rspace=0em >≤</mo></msub></mrow><annotation encoding="application/x-tex">{\bf x}_{\leq}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.689619em;vertical-align:-0.24517899999999998em;"></span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.295179em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24517899999999998em;"><span></span></span></span></span></span></span></span></span></span> is the vector <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi mathvariant=bold >x</mi></mrow><annotation encoding="application/x-tex">{\bf x}</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.44444em;vertical-align:0em;"></span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span></span></span></span> with components sorted in non-decreasing order. Hence as soon as f is invariant, we have <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>f</mi><mo stretchy=false >(</mo><msub><mi mathvariant=bold >x</mi><mo lspace=0em  rspace=0em >≤</mo></msub><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">f({\bf x}) = f({\bf x}_{\leq})</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=msupsub ><span class="vlist-t vlist-t2"><span class=vlist-r ><span class=vlist  style="height:0.295179em;"><span style="top:-2.5500000000000003em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight">≤</span></span></span></span></span><span class=vlist-s >​</span></span><span class=vlist-r ><span class=vlist  style="height:0.24517899999999998em;"><span></span></span></span></span></span></span><span class=mclose >)</span></span></span></span> so that <span class=eqref >(<a href="#eqinv">6</a>)</span> is valid. We only need to extend the function <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi></mrow><annotation encoding="application/x-tex">\rho</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault">ρ</span></span></span></span> from the domain <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> to <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi mathvariant=double-struck >R</mi><mi>n</mi></msup></mrow><annotation encoding="application/x-tex">\mathbb R^n</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.68889em;vertical-align:0em;"></span><span class=mord ><span class="mord mathbb">R</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.664392em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">n</span></span></span></span></span></span></span></span></span></span></span> in a continuous way. This can be done by considering the projection <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>π</mi></mrow><annotation encoding="application/x-tex">\pi</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">π</span></span></span></span> on the compact <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mtext>Im</mtext><mo stretchy=false >(</mo><mi mathvariant=normal >Φ</mi><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\text{Im}(\Phi)</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord text"><span class=mord >Im</span></span><span class=mopen >(</span><span class=mord >Φ</span><span class=mclose >)</span></span></span></span> and define <span class=katex ><span class=katex-mathml ><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ρ</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo>=</mo><mi>f</mi><mo>∘</mo><msup><mi mathvariant=normal >Φ</mi><mrow><mo>−</mo><mn>1</mn></mrow></msup><mo stretchy=false >(</mo><mi>π</mi><mo stretchy=false >(</mo><mi mathvariant=bold >x</mi><mo stretchy=false >)</mo><mo stretchy=false >)</mo></mrow><annotation encoding="application/x-tex">\rho({\bf x}) = f\circ \Phi^{-1}(\pi({\bf x}))</annotation></semantics></math></span><span class=katex-html  aria-hidden=true ><span class=base ><span class=strut  style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">ρ</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span><span class=mrel >=</span><span class=mspace  style="margin-right:0.2777777777777778em;"></span></span><span class=base ><span class=strut  style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span><span class=mbin >∘</span><span class=mspace  style="margin-right:0.2222222222222222em;"></span></span><span class=base ><span class=strut  style="height:1.064108em;vertical-align:-0.25em;"></span><span class=mord ><span class=mord >Φ</span><span class=msupsub ><span class=vlist-t ><span class=vlist-r ><span class=vlist  style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class=pstrut  style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">1</span></span></span></span></span></span></span></span></span><span class=mopen >(</span><span class="mord mathdefault" style="margin-right:0.03588em;">π</span><span class=mopen >(</span><span class=mord ><span class=mord ><span class="mord mathbf">x</span></span></span><span class=mclose >)</span><span class=mclose >)</span></span></span></span>.</p> <p>Follow on <a href="https://twitter.com/marc_lelarge">twitter</a>&#33;</p> <h2 id=thanks_for_reading ><a href="#thanks_for_reading" class=header-anchor >Thanks for reading&#33;</a></h2> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/extras/jupyterlab/index.html b/modules/extras/jupyterlab/index.html
index 37ebca4..01b9fc0 100644
--- a/modules/extras/jupyterlab/index.html
+++ b/modules/extras/jupyterlab/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=jupyterlab ><a href="#jupyterlab" class=header-anchor >JupyterLab</a></h1> <p>This post explains how to install and configure <a href="https://jupyterlab.readthedocs.io/en/stable/">JupyterLab</a>.</p> <h2 id=installation ><a href="#installation" class=header-anchor >Installation</a></h2> <p>If you are using virtual environments it&#39;s preferable to install JupyterLab outside a virtual environment and add them later as kernels.</p> <p>JupyterLab can be installed from <code>pip</code>:</p> <pre><code class="bash hljs">pip3 instal jupyterlab</code></pre>
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=jupyterlab ><a href="#jupyterlab" class=header-anchor >JupyterLab</a></h1> <p>This post explains how to install and configure <a href="https://jupyterlab.readthedocs.io/en/stable/">JupyterLab</a>.</p> <h2 id=installation ><a href="#installation" class=header-anchor >Installation</a></h2> <p>If you are using virtual environments it&#39;s preferable to install JupyterLab outside a virtual environment and add them later as kernels.</p> <p>JupyterLab can be installed from <code>pip</code>:</p> <pre><code class="bash hljs">pip3 instal jupyterlab</code></pre>
 <p>Then launch it with the following command:</p>
 <pre><code class="bash hljs">jupyter-lab</code></pre>
 <p>If you are used to using tmux, you can run JupyterLab in the background with the following command:</p>
@@ -67,7 +67,7 @@ <h3 id=a_hrefhttpspypiorgprojectjupyterlab-templatestemplates ><a href="#a_hrefh
 <div class=page-foot >
   <div class=copyright >
     <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a>
-    Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
+    Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
   </div>
 </div>
 </div>
diff --git a/modules/graph0/index.html b/modules/graph0/index.html
index 019ea54..a23d527 100644
--- a/modules/graph0/index.html
+++ b/modules/graph0/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs ><a href="#module_-_deep_learning_on_graphs" class=header-anchor >Module - Deep Learning on graphs </a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#introduction">Introduction</a><li><a href="#node_embedding">Node embedding</a><li><a href="#signal_processing_on_graphs">Signal processing on graphs</a><li><a href="#graph_embedding">Graph embedding</a><li><a href="#more_advanced_material">More advanced material</a></ol></div> <h2 id=introduction ><a href="#introduction" class=header-anchor >Introduction</a></h2> <ul> <li><p>Slides for a short <a href="https://dataflowr.github.io/slides/deep_graph_0.html">overview</a> </p> </ul> <h2 id=node_embedding ><a href="#node_embedding" class=header-anchor >Node embedding</a></h2> <ul> <li><p>Course: <a href="https://dataflowr.github.io/slides/deep_graph_1.html">Node embedding</a></p> </ul> <h2 id=signal_processing_on_graphs ><a href="#signal_processing_on_graphs" class=header-anchor >Signal processing on graphs</a></h2> <ul> <li><p>Course: <a href="https://dataflowr.github.io/slides/deep_graph_2.html">Signal processing on graphs</a></p> <li><p>Related post: <a href="https://dataflowr.github.io/website/modules/extras/GCN_inductivebias_spectral/">Inductive bias in GCN: a spectral perspective</a></p> </ul> <h2 id=graph_embedding ><a href="#graph_embedding" class=header-anchor >Graph embedding</a></h2> <ul> <li><p>Course:<a href="https://dataflowr.github.io/slides/deep_graph_3.html">Graph embedding</a></p> <li><p>Related post: <a href="https://dataflowr.github.io/website/modules/extras/invariant_equivariant/">Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></p> </ul> <h2 id=more_advanced_material ><a href="#more_advanced_material" class=header-anchor >More advanced material</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/extras/graph_invariant">Exploiting Graph Invariants in Deep Learning</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs ><a href="#module_-_deep_learning_on_graphs" class=header-anchor >Module - Deep Learning on graphs </a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#introduction">Introduction</a><li><a href="#node_embedding">Node embedding</a><li><a href="#signal_processing_on_graphs">Signal processing on graphs</a><li><a href="#graph_embedding">Graph embedding</a><li><a href="#more_advanced_material">More advanced material</a></ol></div> <h2 id=introduction ><a href="#introduction" class=header-anchor >Introduction</a></h2> <ul> <li><p>Slides for a short <a href="https://dataflowr.github.io/slides/deep_graph_0.html">overview</a> </p> </ul> <h2 id=node_embedding ><a href="#node_embedding" class=header-anchor >Node embedding</a></h2> <ul> <li><p>Course: <a href="https://dataflowr.github.io/slides/deep_graph_1.html">Node embedding</a></p> </ul> <h2 id=signal_processing_on_graphs ><a href="#signal_processing_on_graphs" class=header-anchor >Signal processing on graphs</a></h2> <ul> <li><p>Course: <a href="https://dataflowr.github.io/slides/deep_graph_2.html">Signal processing on graphs</a></p> <li><p>Related post: <a href="https://dataflowr.github.io/website/modules/extras/GCN_inductivebias_spectral/">Inductive bias in GCN: a spectral perspective</a></p> </ul> <h2 id=graph_embedding ><a href="#graph_embedding" class=header-anchor >Graph embedding</a></h2> <ul> <li><p>Course:<a href="https://dataflowr.github.io/slides/deep_graph_3.html">Graph embedding</a></p> <li><p>Related post: <a href="https://dataflowr.github.io/website/modules/extras/invariant_equivariant/">Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></p> </ul> <h2 id=more_advanced_material ><a href="#more_advanced_material" class=header-anchor >More advanced material</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/website/modules/extras/graph_invariant">Exploiting Graph Invariants in Deep Learning</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/graph1/index.html b/modules/graph1/index.html
index 3b21f63..45113c9 100644
--- a/modules/graph1/index.html
+++ b/modules/graph1/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs_1 ><a href="#module_-_deep_learning_on_graphs_1" class=header-anchor >Module - Deep Learning on graphs &#40;1&#41;</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#node_embedding">Node embedding</a><li><a href="#slides">Slides</a></ol></div> <h2 id=node_embedding ><a href="#node_embedding" class=header-anchor >Node embedding</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Ch0Iz8BJn98', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Introduction <br> <a href='#player' onclick='changeYouTubeSource(132,0)'> 2:12</a> Language model <br> <a href='#player' onclick='changeYouTubeSource(304,0)'> 5:04</a> Skip-gram model <br> <a href='#player' onclick='changeYouTubeSource(524,0)'> 8:44</a> Hierarchical softmax <br> <a href='#player' onclick='changeYouTubeSource(679,0)'> 11:19</a> DeepWalk <br> <a href='#player' onclick='changeYouTubeSource(866,0)'> 14:26</a> Negative sampling <br> <a href='#player' onclick='changeYouTubeSource(1150,0)'> 19:10</a> node2vec <br> <a href='#player' onclick='changeYouTubeSource(1348,0)'> 22:28</a> results on les Misérables <br> <a href='#player' onclick='changeYouTubeSource(1510,0)'> 25:10</a> results for multi-label classification </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/deep_graph_1.html">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs_1 ><a href="#module_-_deep_learning_on_graphs_1" class=header-anchor >Module - Deep Learning on graphs &#40;1&#41;</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#node_embedding">Node embedding</a><li><a href="#slides">Slides</a></ol></div> <h2 id=node_embedding ><a href="#node_embedding" class=header-anchor >Node embedding</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'Ch0Iz8BJn98', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Introduction <br> <a href='#player' onclick='changeYouTubeSource(132,0)'> 2:12</a> Language model <br> <a href='#player' onclick='changeYouTubeSource(304,0)'> 5:04</a> Skip-gram model <br> <a href='#player' onclick='changeYouTubeSource(524,0)'> 8:44</a> Hierarchical softmax <br> <a href='#player' onclick='changeYouTubeSource(679,0)'> 11:19</a> DeepWalk <br> <a href='#player' onclick='changeYouTubeSource(866,0)'> 14:26</a> Negative sampling <br> <a href='#player' onclick='changeYouTubeSource(1150,0)'> 19:10</a> node2vec <br> <a href='#player' onclick='changeYouTubeSource(1348,0)'> 22:28</a> results on les Misérables <br> <a href='#player' onclick='changeYouTubeSource(1510,0)'> 25:10</a> results for multi-label classification </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/deep_graph_1.html">slides</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/graph2/index.html b/modules/graph2/index.html
index 1a2eb87..0277c17 100644
--- a/modules/graph2/index.html
+++ b/modules/graph2/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs_2 ><a href="#module_-_deep_learning_on_graphs_2" class=header-anchor >Module - Deep Learning on graphs &#40;2&#41;</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#signal_processing_on_graphs">Signal processing on graphs</a><li><a href="#slides">Slides</a><li><a href="#notebook">Notebook</a><li><a href="#posts">Posts</a></ol></div> <h2 id=signal_processing_on_graphs ><a href="#signal_processing_on_graphs" class=header-anchor >Signal processing on graphs</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'o1CfrsSTRAU', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Introduction <br> <a href='#player' onclick='changeYouTubeSource(100,0)'> 1:40</a> Signal processing on graphs <br> <a href='#player' onclick='changeYouTubeSource(184,0)'> 3:04</a> Recap on Fourier analysis <br> <a href='#player' onclick='changeYouTubeSource(304,0)'> 5:04</a> Spectral graph theory <br> <a href='#player' onclick='changeYouTubeSource(824,0)'> 13:44</a> Graph Fourier analysis <br> <a href='#player' onclick='changeYouTubeSource(998,0)'> 16:38</a> Filtering <br> <a href='#player' onclick='changeYouTubeSource(1113,0)'> 18:33</a> Filtering on graphs <br> <a href='#player' onclick='changeYouTubeSource(1321,0)'> 22:01</a> Learning a localized kernel <br> <a href='#player' onclick='changeYouTubeSource(1503,0)'> 25:03</a> Chebyshev polynomials <br> <a href='#player' onclick='changeYouTubeSource(1828,0)'> 30:28</a> Convolutional neural networks on graphs </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/deep_graph_2.html">slides</a></p> </ul> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/graphs/spectral_gnn.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/spectral_gnn.ipynb">colab</a></p> </ul> <h2 id=posts ><a href="#posts" class=header-anchor >Posts</a></h2> <ul> <li><p><a href="../extras/GCN_inductivebias_spectral/">Inductive bias in GCN: a spectral perspective</a> &#40;run the <a href="https://github.com/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral.ipynb">code</a> or open it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral-colab.ipynb">Colab</a>&#41;</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs_2 ><a href="#module_-_deep_learning_on_graphs_2" class=header-anchor >Module - Deep Learning on graphs &#40;2&#41;</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#signal_processing_on_graphs">Signal processing on graphs</a><li><a href="#slides">Slides</a><li><a href="#notebook">Notebook</a><li><a href="#posts">Posts</a></ol></div> <h2 id=signal_processing_on_graphs ><a href="#signal_processing_on_graphs" class=header-anchor >Signal processing on graphs</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'o1CfrsSTRAU', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Introduction <br> <a href='#player' onclick='changeYouTubeSource(100,0)'> 1:40</a> Signal processing on graphs <br> <a href='#player' onclick='changeYouTubeSource(184,0)'> 3:04</a> Recap on Fourier analysis <br> <a href='#player' onclick='changeYouTubeSource(304,0)'> 5:04</a> Spectral graph theory <br> <a href='#player' onclick='changeYouTubeSource(824,0)'> 13:44</a> Graph Fourier analysis <br> <a href='#player' onclick='changeYouTubeSource(998,0)'> 16:38</a> Filtering <br> <a href='#player' onclick='changeYouTubeSource(1113,0)'> 18:33</a> Filtering on graphs <br> <a href='#player' onclick='changeYouTubeSource(1321,0)'> 22:01</a> Learning a localized kernel <br> <a href='#player' onclick='changeYouTubeSource(1503,0)'> 25:03</a> Chebyshev polynomials <br> <a href='#player' onclick='changeYouTubeSource(1828,0)'> 30:28</a> Convolutional neural networks on graphs </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/deep_graph_2.html">slides</a></p> </ul> <h2 id=notebook ><a href="#notebook" class=header-anchor >Notebook</a></h2> <ul> <li><p><a href="https://github.com/dataflowr/notebooks/blob/master/graphs/spectral_gnn.ipynb">notebook</a> in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/spectral_gnn.ipynb">colab</a></p> </ul> <h2 id=posts ><a href="#posts" class=header-anchor >Posts</a></h2> <ul> <li><p><a href="../extras/GCN_inductivebias_spectral/">Inductive bias in GCN: a spectral perspective</a> &#40;run the <a href="https://github.com/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral.ipynb">code</a> or open it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/graphs/GCN_inductivebias_spectral-colab.ipynb">Colab</a>&#41;</p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/graph3/index.html b/modules/graph3/index.html
index b0b7852..e94cc1a 100644
--- a/modules/graph3/index.html
+++ b/modules/graph3/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs_3 ><a href="#module_-_deep_learning_on_graphs_3" class=header-anchor >Module - Deep Learning on graphs &#40;3&#41;</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#graph_embedding">Graph embedding</a><li><a href="#slides">Slides</a><li><a href="#post">Post</a></ol></div> <h2 id=graph_embedding ><a href="#graph_embedding" class=header-anchor >Graph embedding</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'ZD0zwjEQhz8', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Introduction <br> <a href='#player' onclick='changeYouTubeSource(90,0)'> 1:30</a> Graph embedding <br> <a href='#player' onclick='changeYouTubeSource(163,0)'> 2:43</a> How to represent graphs? <br> <a href='#player' onclick='changeYouTubeSource(238,0)'> 3:58</a> Why graph symmetries matter? <br> <a href='#player' onclick='changeYouTubeSource(505,0)'> 8:25</a> Invariant and equivariant functions <br> <a href='#player' onclick='changeYouTubeSource(750,0)'> 12:30</a> Message passing GNN <br> <a href='#player' onclick='changeYouTubeSource(962,0)'> 16:02</a> The many flavors of MGNN <br> <a href='#player' onclick='changeYouTubeSource(1200,0)'> 20:00</a> Separating power <br> <a href='#player' onclick='changeYouTubeSource(1371,0)'> 22:51</a> 2-Weisfeiler-Lehman test <br> <a href='#player' onclick='changeYouTubeSource(1619,0)'> 26:59</a> How powerful are MGNN <br> <a href='#player' onclick='changeYouTubeSource(1707,0)'> 28:27</a> Empirical results <br> <a href='#player' onclick='changeYouTubeSource(1750,0)'> 29:10</a> Graphs as higher order tensors <br> <a href='#player' onclick='changeYouTubeSource(1905,0)'> 31:45</a> Invariant and equivariant linear operator <br> <a href='#player' onclick='changeYouTubeSource(2147,0)'> 35:47</a> Invariant linear GNN <br> <a href='#player' onclick='changeYouTubeSource(2298,0)'> 38:18</a> Folklore GNN </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/deep_graph_3.html">slides</a></p> </ul> <h2 id=post ><a href="#post" class=header-anchor >Post</a></h2> <ul> <li><p><a href="../extras/invariant_equivariant">Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item active" href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_deep_learning_on_graphs_3 ><a href="#module_-_deep_learning_on_graphs_3" class=header-anchor >Module - Deep Learning on graphs &#40;3&#41;</a></h1> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#graph_embedding">Graph embedding</a><li><a href="#slides">Slides</a><li><a href="#post">Post</a></ol></div> <h2 id=graph_embedding ><a href="#graph_embedding" class=header-anchor >Graph embedding</a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: 'ZD0zwjEQhz8', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Introduction <br> <a href='#player' onclick='changeYouTubeSource(90,0)'> 1:30</a> Graph embedding <br> <a href='#player' onclick='changeYouTubeSource(163,0)'> 2:43</a> How to represent graphs? <br> <a href='#player' onclick='changeYouTubeSource(238,0)'> 3:58</a> Why graph symmetries matter? <br> <a href='#player' onclick='changeYouTubeSource(505,0)'> 8:25</a> Invariant and equivariant functions <br> <a href='#player' onclick='changeYouTubeSource(750,0)'> 12:30</a> Message passing GNN <br> <a href='#player' onclick='changeYouTubeSource(962,0)'> 16:02</a> The many flavors of MGNN <br> <a href='#player' onclick='changeYouTubeSource(1200,0)'> 20:00</a> Separating power <br> <a href='#player' onclick='changeYouTubeSource(1371,0)'> 22:51</a> 2-Weisfeiler-Lehman test <br> <a href='#player' onclick='changeYouTubeSource(1619,0)'> 26:59</a> How powerful are MGNN <br> <a href='#player' onclick='changeYouTubeSource(1707,0)'> 28:27</a> Empirical results <br> <a href='#player' onclick='changeYouTubeSource(1750,0)'> 29:10</a> Graphs as higher order tensors <br> <a href='#player' onclick='changeYouTubeSource(1905,0)'> 31:45</a> Invariant and equivariant linear operator <br> <a href='#player' onclick='changeYouTubeSource(2147,0)'> 35:47</a> Invariant linear GNN <br> <a href='#player' onclick='changeYouTubeSource(2298,0)'> 38:18</a> Folklore GNN </p> <h2 id=slides ><a href="#slides" class=header-anchor >Slides</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/deep_graph_3.html">slides</a></p> </ul> <h2 id=post ><a href="#post" class=header-anchor >Post</a></h2> <ul> <li><p><a href="../extras/invariant_equivariant">Invariant and equivariant layers with applications to GNN, PointNet and Transformers</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/modules/privacy-preserving-ML/index.html b/modules/privacy-preserving-ML/index.html
index 5e56a43..a2b3480 100644
--- a/modules/privacy-preserving-ML/index.html
+++ b/modules/privacy-preserving-ML/index.html
@@ -1 +1 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item active" href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_privacy_preserving_machine_learning ><a href="#module_-_privacy_preserving_machine_learning" class=header-anchor >Module - Privacy Preserving Machine Learning </a></h1> <p>by <a href="https://github.com/dhuynh95">Daniel Huynh</a></p> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#privacy_preserving_machine_learning">Privacy Preserving Machine Learning </a><li><a href="#slides_and_code">Slides and code</a><li><a href="#to_go_beyond">to go beyond</a></ol></div> <h2 id=privacy_preserving_machine_learning ><a href="#privacy_preserving_machine_learning" class=header-anchor >Privacy Preserving Machine Learning </a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: '68aCbiPIgnM', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Presentation <br> <a href='#player' onclick='changeYouTubeSource(170,0)'> 2:50</a> Context and cloud data threads <br> <a href='#player' onclick='changeYouTubeSource(315,0)'> 5:15</a> Confidential Computing (CC) <br> <a href='#player' onclick='changeYouTubeSource(432,0)'> 7:12</a> Intel SGX <br> <a href='#player' onclick='changeYouTubeSource(520,0)'> 8:40</a> Enclave <br> <a href='#player' onclick='changeYouTubeSource(739,0)'> 12:19</a> Azure Attestation Service <br> <a href='#player' onclick='changeYouTubeSource(805,0)'> 13:25</a> Use cases <br> <a href='#player' onclick='changeYouTubeSource(890,0)'> 14:50</a> Abdstraction layers for enclaves <br> <a href='#player' onclick='changeYouTubeSource(957,0)'> 15:57</a> Open enclave SDK <br> <a href='#player' onclick='changeYouTubeSource(987,0)'> 16:27</a> Lightweight OS + Demo (Graphene SGX) <br> <a href='#player' onclick='changeYouTubeSource(1424,0)'> 23:44</a> Multi-party machine learning <br> <a href='#player' onclick='changeYouTubeSource(1610,0)'> 26:50</a> Q&A <br> <a href='#player' onclick='changeYouTubeSource(2006,0)'> 33:26</a> Homomorphic Encryption (HE) <br> <a href='#player' onclick='changeYouTubeSource(2240,0)'> 37:20</a> CKKS encoder <br> <a href='#player' onclick='changeYouTubeSource(2489,0)'> 41:29</a> Homomorphic Encryption high-level view <br> <a href='#player' onclick='changeYouTubeSource(2544,0)'> 42:24</a> Homomorphic Encryption in practice <br> <a href='#player' onclick='changeYouTubeSource(2717,0)'> 45:17</a> Demo with TenSEAL <br> <a href='#player' onclick='changeYouTubeSource(3025,0)'> 50:25</a> Demo Homomorphic Random Forests <br> <a href='#player' onclick='changeYouTubeSource(3698,0)'> 1:01:38</a> to go beyond <br> <a href='#player' onclick='changeYouTubeSource(3748,0)'> 1:02:28</a> Secure Multi-Party Computing (MPC) <br> <a href='#player' onclick='changeYouTubeSource(4078,0)'> 1:07:58</a> Conclusion </p> <h2 id=slides_and_code ><a href="#slides_and_code" class=header-anchor >Slides and code</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/privacy_preserving_ML_Daniel_Huynh.pdf">slides</a></p> <li><p><a href="https://github.com/dhuynh95/cryptotree">Cryptotree: Homomorphic Random Forests</a></p> </ul> <h2 id=to_go_beyond ><a href="#to_go_beyond" class=header-anchor >to go beyond</a></h2> <ul> <li><p><a href="https://towardsdatascience.com/homomorphic-encryption-intro-part-1-overview-and-use-cases-a601adcff06c">Homomorphic Encryption</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item active" href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><h1 id=module_-_privacy_preserving_machine_learning ><a href="#module_-_privacy_preserving_machine_learning" class=header-anchor >Module - Privacy Preserving Machine Learning </a></h1> <p>by <a href="https://github.com/dhuynh95">Daniel Huynh</a></p> <p><strong>Table of Contents</strong></p> <div class=franklin-toc ><ol><li><a href="#privacy_preserving_machine_learning">Privacy Preserving Machine Learning </a><li><a href="#slides_and_code">Slides and code</a><li><a href="#to_go_beyond">to go beyond</a></ol></div> <h2 id=privacy_preserving_machine_learning ><a href="#privacy_preserving_machine_learning" class=header-anchor >Privacy Preserving Machine Learning </a></h2> <p> <div id=videoContainer  > <div id=player ></div> </div> <script> var tag = document.createElement('script'); tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode.insertBefore(tag, firstScriptTag); var player; function onYouTubeIframeAPIReady() { player = new YT.Player('player', { height: '300', width: '100%', videoId: '68aCbiPIgnM', playerVars: { 'autoplay': 0, 'rel': 0, 'cc_load_policy': 1 } }); } function changeYouTubeSource(startTime, endTime) { var youtubeIframe = document.getElementById('player'); var youtubeIframeSrc = document.getElementById('player').getAttribute('src'); var trimmedIframeUrl = ''; var iframeUrlTimeStamp = ''; if (youtubeIframeSrc.match(/&start=/g)) { var mediaFragmentIndex = youtubeIframeSrc.indexOf('&start='); trimmedIframeUrl = youtubeIframeSrc.slice(0, mediaFragmentIndex); if (endTime === 0) { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime; } else { iframeUrlTimeStamp = trimmedIframeUrl + '&start=' + startTime + '&end=' + endTime; } } if (youtubeIframeSrc.match(/&start=/g) === null) { if (endTime === 0) { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime; } else { iframeUrlTimeStamp = youtubeIframeSrc + '&start=' + startTime + '&end=' + endTime; } } setTimeout(function() { var iframeAutoplayUrl = iframeUrlTimeStamp.replace('autoplay=0', 'autoplay=1'); youtubeIframe.setAttribute('src', iframeAutoplayUrl); }, 1000); } </script> <br> <a href='#player' onclick='changeYouTubeSource(0,0)'> 0:00</a> Presentation <br> <a href='#player' onclick='changeYouTubeSource(170,0)'> 2:50</a> Context and cloud data threads <br> <a href='#player' onclick='changeYouTubeSource(315,0)'> 5:15</a> Confidential Computing (CC) <br> <a href='#player' onclick='changeYouTubeSource(432,0)'> 7:12</a> Intel SGX <br> <a href='#player' onclick='changeYouTubeSource(520,0)'> 8:40</a> Enclave <br> <a href='#player' onclick='changeYouTubeSource(739,0)'> 12:19</a> Azure Attestation Service <br> <a href='#player' onclick='changeYouTubeSource(805,0)'> 13:25</a> Use cases <br> <a href='#player' onclick='changeYouTubeSource(890,0)'> 14:50</a> Abdstraction layers for enclaves <br> <a href='#player' onclick='changeYouTubeSource(957,0)'> 15:57</a> Open enclave SDK <br> <a href='#player' onclick='changeYouTubeSource(987,0)'> 16:27</a> Lightweight OS + Demo (Graphene SGX) <br> <a href='#player' onclick='changeYouTubeSource(1424,0)'> 23:44</a> Multi-party machine learning <br> <a href='#player' onclick='changeYouTubeSource(1610,0)'> 26:50</a> Q&A <br> <a href='#player' onclick='changeYouTubeSource(2006,0)'> 33:26</a> Homomorphic Encryption (HE) <br> <a href='#player' onclick='changeYouTubeSource(2240,0)'> 37:20</a> CKKS encoder <br> <a href='#player' onclick='changeYouTubeSource(2489,0)'> 41:29</a> Homomorphic Encryption high-level view <br> <a href='#player' onclick='changeYouTubeSource(2544,0)'> 42:24</a> Homomorphic Encryption in practice <br> <a href='#player' onclick='changeYouTubeSource(2717,0)'> 45:17</a> Demo with TenSEAL <br> <a href='#player' onclick='changeYouTubeSource(3025,0)'> 50:25</a> Demo Homomorphic Random Forests <br> <a href='#player' onclick='changeYouTubeSource(3698,0)'> 1:01:38</a> to go beyond <br> <a href='#player' onclick='changeYouTubeSource(3748,0)'> 1:02:28</a> Secure Multi-Party Computing (MPC) <br> <a href='#player' onclick='changeYouTubeSource(4078,0)'> 1:07:58</a> Conclusion </p> <h2 id=slides_and_code ><a href="#slides_and_code" class=header-anchor >Slides and code</a></h2> <ul> <li><p><a href="https://dataflowr.github.io/slides/privacy_preserving_ML_Daniel_Huynh.pdf">slides</a></p> <li><p><a href="https://github.com/dhuynh95/cryptotree">Cryptotree: Homomorphic Random Forests</a></p> </ul> <h2 id=to_go_beyond ><a href="#to_go_beyond" class=header-anchor >to go beyond</a></h2> <ul> <li><p><a href="https://towardsdatascience.com/homomorphic-encryption-intro-part-1-overview-and-use-cases-a601adcff06c">Homomorphic Encryption</a></p> </ul> <div class=page-foot > <div class=copyright > <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a> Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>. </div> </div> </div> </div>
\ No newline at end of file
diff --git a/notebooks_md/01_intro/index.html b/notebooks_md/01_intro/index.html
index b1e1b3b..33bf371 100644
--- a/notebooks_md/01_intro/index.html
+++ b/notebooks_md/01_intro/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><p><a href="https://dataflowr.github.io/website/"><img src="https://raw.githubusercontent.com/dataflowr/website/master/_assets/dataflowr_logo.png" alt=Dataflowr  /></a></p> <p>You are viewing the static version of the notebook, you can get the <a href="https://github.com/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">code &#40;GitHub&#41;</a> or run it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">colab</a></p> <h1 id=a_hrefhttpsdataflowrgithubiowebsitemodules1-intro-general-overviewmodule_1_using_cnn_for_dogs_vs_cats ><a href="#a_hrefhttpsdataflowrgithubiowebsitemodules1-intro-general-overviewmodule_1_using_cnn_for_dogs_vs_cats" class=header-anchor ><a href="https://dataflowr.github.io/website/modules/1-intro-general-overview/">Module 1</a>: Using CNN for dogs vs cats</a></h1> <p>To illustrate the Deep Learning pipeline seen in <a href="https://dataflowr.github.io/website/modules/1-intro-general-overview/">Module 1</a>, we are going to use a pretrained model to enter the <a href="https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition">Dogs vs Cats</a> competition at Kaggle.</p> <p><a href="https://youtu.be/ZhC-DIrCe6A?t&#61;1175">Video timestamp</a></p> <p>There are 25,000 labelled dog and cat photos available for training, and 12,500 in the test set that we have to try to label for this competition. According to the Kaggle web-site, when this competition was launched &#40;end of 2013&#41;: <em>&quot;<strong>State of the art</strong>: The current literature suggests machine classifiers can score above 80&#37; accuracy on this task&quot;</em>. So if you can beat 80&#37;, then you will be at the cutting edge as of 2013&#33;</p> <h2 id=imports ><a href="#imports" class=header-anchor >Imports</a></h2> <pre><code class="python hljs"><span class=hljs-keyword >import</span> numpy <span class=hljs-keyword >as</span> np
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><p><a href="https://dataflowr.github.io/website/"><img src="https://raw.githubusercontent.com/dataflowr/website/master/_assets/dataflowr_logo.png" alt=Dataflowr  /></a></p> <p>You are viewing the static version of the notebook, you can get the <a href="https://github.com/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">code &#40;GitHub&#41;</a> or run it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module1/01_intro.ipynb">colab</a></p> <h1 id=a_hrefhttpsdataflowrgithubiowebsitemodules1-intro-general-overviewmodule_1_using_cnn_for_dogs_vs_cats ><a href="#a_hrefhttpsdataflowrgithubiowebsitemodules1-intro-general-overviewmodule_1_using_cnn_for_dogs_vs_cats" class=header-anchor ><a href="https://dataflowr.github.io/website/modules/1-intro-general-overview/">Module 1</a>: Using CNN for dogs vs cats</a></h1> <p>To illustrate the Deep Learning pipeline seen in <a href="https://dataflowr.github.io/website/modules/1-intro-general-overview/">Module 1</a>, we are going to use a pretrained model to enter the <a href="https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition">Dogs vs Cats</a> competition at Kaggle.</p> <p><a href="https://youtu.be/ZhC-DIrCe6A?t&#61;1175">Video timestamp</a></p> <p>There are 25,000 labelled dog and cat photos available for training, and 12,500 in the test set that we have to try to label for this competition. According to the Kaggle web-site, when this competition was launched &#40;end of 2013&#41;: <em>&quot;<strong>State of the art</strong>: The current literature suggests machine classifiers can score above 80&#37; accuracy on this task&quot;</em>. So if you can beat 80&#37;, then you will be at the cutting edge as of 2013&#33;</p> <h2 id=imports ><a href="#imports" class=header-anchor >Imports</a></h2> <pre><code class="python hljs"><span class=hljs-keyword >import</span> numpy <span class=hljs-keyword >as</span> np
 <span class=hljs-keyword >import</span> matplotlib.pyplot <span class=hljs-keyword >as</span> plt
 <span class=hljs-keyword >import</span> os
 <span class=hljs-keyword >import</span> torch
@@ -344,7 +344,7 @@ <h1 id=conclusion ><a href="#conclusion" class=header-anchor >Conclusion</a></h1
 <div class=page-foot >
   <div class=copyright >
     <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a>
-    Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
+    Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
   </div>
 </div>
 </div>
diff --git a/notebooks_md/02a_basics/index.html b/notebooks_md/02a_basics/index.html
index f318d5f..12ef7d4 100644
--- a/notebooks_md/02a_basics/index.html
+++ b/notebooks_md/02a_basics/index.html
@@ -1,4 +1,4 @@
-<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><p><a href="https://dataflowr.github.io/website/"><img src="https://raw.githubusercontent.com/dataflowr/website/master/_assets/dataflowr_logo.png" alt=Dataflowr  /></a></p> <p>You are viewing the static version of the notebook, you can get the <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">code &#40;GitHub&#41;</a> or run it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a></p> <p>You can also do the <a href="https://dataflowr.github.io/quiz/module2a.html">quizzes</a></p> <h1 id=module_2_pytorch_tensors_and_automatic_differentiation ><a href="#module_2_pytorch_tensors_and_automatic_differentiation" class=header-anchor >Module 2: PyTorch tensors and automatic differentiation</a></h1> <p><a href="https://youtu.be/BmAS8IH7n3c?t&#61;103">Video timestamp</a></p> <pre><code class=language-python >import matplotlib.pyplot as plt
+<!doctype html> <html lang=en > <meta charset=UTF-8 > <meta name=viewport  content="width=device-width, initial-scale=1"> <link rel=stylesheet  href="/website/libs/katex/katex.min.css"> <link rel=stylesheet  href="/website/libs/highlight/github.min.css"> <link rel=stylesheet  href="/website/css/franklin.css"> <link rel=stylesheet  href="/website/css/poole_hyde.css"> <link rel=stylesheet  href="/website/css/custom.css"> <style> html {font-size: 17px;} .franklin-content {position: relative; padding-left: 8%; padding-right: 5%; line-height: 1.35em;} @media (min-width: 940px) { .franklin-content {width: 100%; margin-left: auto; margin-right: auto;} } @media (max-width: 768px) { .franklin-content {padding-left: 6%; padding-right: 6%;} } </style> <link rel=icon  href="/website/assets/favicon.png"> <title>Dataflowr - Deep Learning DIY</title> <div class=sidebar > <div class="container sidebar-sticky"> <div class=sidebar-about > <img src="/website/assets/dataflowr_violet_plain_square.png" style="width: 120px; height: auto; display: inline"> <img src="/website/assets/favicon.png" style="margin-left:1em; position:relative;left:0px; top:-30px; width: 60px; height: auto; display: inline"> <h1 style="font-size:1em; opacity: 0.95;"><a href="/website/">Deep Learning DIY</a></h1> </div> <nav class=sidebar-nav > <a class="sidebar-nav-item " href="/website/modules/0-sotfware-installation"> <b>Module 0</b> - <em> Software installation</em> </a> <a class="sidebar-nav-item " href="/website/modules/1-intro-general-overview"> <b>Module 1</b> - <em>Introduction & General Overview</em> </a> <a class="sidebar-nav-item " href="/website/modules/2a-pytorch-tensors"> <b>Module 2a</b> - <em>PyTorch tensors</em> </a> <a class="sidebar-nav-item " href="/website/modules/2b-automatic-differentiation"> <b>Module 2b</b> - <em>Automatic differentiation</em> </a> <a class="sidebar-nav-item " href="/website/modules/2c-jax"> <b>Module 2c</b> - <em>Automatic differentiation: VJP and intro to JAX</em> </a> <a class="sidebar-nav-item " href="/website/modules/3-loss-functions-for-classification"> <b>Module 3</b> - <em>Loss functions for classification</em> </a> <a class="sidebar-nav-item " href="/website/modules/4-optimization-for-deep-learning"> <b>Module 4</b> - <em>Optimization for DL</em> </a> <a class="sidebar-nav-item " href="/website/modules/5-stacking-layers"> <b>Module 5</b> - <em>Stacking layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/6-convolutional-neural-network"> <b>Module 6</b> - <em>Convolutional neural network</em> </a> <a class="sidebar-nav-item " href="/website/modules/7-dataloading"> <b>Module 7</b> - <em>Dataloading</em> </a> <a class="sidebar-nav-item " href="/website/modules/8a-embedding-layers"> <b>Module 8a</b> - <em>Embedding layers</em> </a> <a class="sidebar-nav-item " href="/website/modules/8b-collaborative-filtering"> <b>Module 8b</b> - <em>Collaborative filtering</em> </a> <a class="sidebar-nav-item " href="/website/modules/8c-word2vec"> <b>Module 8c</b> - <em>Word2vec</em> </a> <a class="sidebar-nav-item " href="/website/modules/9a-autoencoders"> <b>Module 9a</b> - <em>Autoencoders</em> </a> <a class="sidebar-nav-item " href="/website/modules/9b-unet"> <b>Module 9b</b> - <em>UNets</em> </a> <a class="sidebar-nav-item " href="/website/modules/9c-flows"> <b>Module 9c</b> - <em>Flows</em> </a> <a class="sidebar-nav-item " href="/website/modules/10-generative-adversarial-networks"> <b>Module 10</b> - <em>Generative adversarial networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/11a-recurrent-neural-networks-theory"> <b>Module 11a</b> - <em>Recurrent Neural Networks (theory)</em> </a> <a class="sidebar-nav-item " href="/website/modules/11b-recurrent-neural-networks-practice"> <b>Module 11b</b> - <em>RNN in practice</em> </a> <a class="sidebar-nav-item " href="/website/modules/11c-batches-with-sequences"> <b>Module 11c</b> - <em>Batches with sequences in Pytorch</em> </a> <a class="sidebar-nav-item " href="/website/modules/12-attention"> <b>Module 12</b> - <em>Attention and Transformers</em> </a> <a class="sidebar-nav-item " href="/website/modules/13-siamese"> <b>Module 13</b> - <em>Siamese Networks and Representation Learning</em> </a> <a class="sidebar-nav-item " href="/website/modules/14a-depth"> <b>Module 14a</b> - <em>The Benefits of Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/14b-depth"> <b>Module 14b</b> - <em>The Problems with Depth</em> </a> <a class="sidebar-nav-item " href="/website/modules/15-dropout"> <b>Module 15</b> - <em>Dropout</em> </a> <a class="sidebar-nav-item " href="/website/modules/16-batchnorm"> <b>Module 16</b> - <em>Batchnorm</em> </a> <a class="sidebar-nav-item " href="/website/modules/17-resnets"> <b>Module 17</b> - <em>Resnets</em> </a> <a class="sidebar-nav-item " href="/website/modules/18a-diffusion"> <b>Module 18a</b> - <em>Denoising Diffusion Probabilistic Models</em> </a> <a class="sidebar-nav-item " href="/website/modules/19-clip"> <b>Module 19</b> - <em>Zero-shot classification with CLIP</em> </a> <!-- <div class=week >Unit 7</div>--> <div class=week >Homeworks</div> <a class="sidebar-nav-item " href="/website/homework/1-mlp-from-scratch"> <b>Homework 1</b> - <em>MLP from scratch</em> </a> <a class="sidebar-nav-item " href="/website/homework/2-CAM-adversarial"> <b>Homework 2</b> - <em>Class Activation Map and adversarial examples</em> </a> <a class="sidebar-nav-item " href="/website/homework/3-VAE"> <b>Homework 3</b> - <em>VAE for MNIST clustering and generation</em> </a> <div class=week >Bonus</div> <a class="sidebar-nav-item " href="/website/modules/12-intro-julia"> <b>Module</b> - <em>Intro to Julia: Autodiff with dual numbers</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph0"> <b>Module</b> - <em>Deep learning on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph1"> <b>Graph</b> - <em>Node embeddings</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph2"> <b>Graph</b> - <em>Signal processing on graphs</em> </a> <a class="sidebar-nav-item " href="/website/modules/graph3"> <b>Graph</b> - <em> Graph embeddings and GNNs</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/GCN_inductivebias_spectral"> <b>Post</b> - <em>Spectral GCN</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/Convolutions_first"> <b>Post</b> - <em>Convolutions from first principles</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/invariant_equivariant"> <b>Post</b> - <em>Invariant and equivariant networks</em> </a> <a class="sidebar-nav-item " href="/website/modules/extras/graph_invariant"> <b>Graph</b> - <em>Exploiting Graph Invariants in Deep Learning</em> </a> <div class=week >Guest Lectures</div> <a class="sidebar-nav-item " href="/website/modules/privacy-preserving-ML"> <b>Privacy Preserving ML</b> - <em>Daniel Huynh</em> </a> </nav> </div> </div> <div class="content container"> <div class=franklin-content ><p><a href="https://dataflowr.github.io/website/"><img src="https://raw.githubusercontent.com/dataflowr/website/master/_assets/dataflowr_logo.png" alt=Dataflowr  /></a></p> <p>You are viewing the static version of the notebook, you can get the <a href="https://github.com/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">code &#40;GitHub&#41;</a> or run it in <a href="https://colab.research.google.com/github/dataflowr/notebooks/blob/master/Module2/02a_basics.ipynb">colab</a></p> <p>You can also do the <a href="https://dataflowr.github.io/quiz/module2a.html">quizzes</a></p> <h1 id=module_2_pytorch_tensors_and_automatic_differentiation ><a href="#module_2_pytorch_tensors_and_automatic_differentiation" class=header-anchor >Module 2: PyTorch tensors and automatic differentiation</a></h1> <p><a href="https://youtu.be/BmAS8IH7n3c?t&#61;103">Video timestamp</a></p> <pre><code class=language-python >import matplotlib.pyplot as plt
 &#37;matplotlib inline
 import torch
 import numpy as np</code></pre> <pre><code class=language-python >torch.__version__</code></pre>
@@ -243,7 +243,7 @@ <h1 id=autograd_automatic_differentiation ><a href="#autograd_automatic_differen
 <div class=page-foot >
   <div class=copyright >
     <a href="https://github.com/dataflowr/website/tree/master"><b>Edit this page on <img class=github-logo  src="https://unpkg.com/ionicons@5.1.2/dist/svg/logo-github.svg"></b></a>
-    Last modified: September 15, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
+    Last modified: November 12, 2023. Website built with <a href="https://github.com/tlienart/Franklin.jl">Franklin.jl</a> and the <a href="https://julialang.org">Julia programming language</a>.
   </div>
 </div>
 </div>
diff --git a/package-lock.json b/package-lock.json
index 21bb504..801b617 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5,13 +5,13 @@
   "packages": {
     "": {
       "dependencies": {
-        "highlight.js": "^11.8.0"
+        "highlight.js": "^11.9.0"
       }
     },
     "node_modules/highlight.js": {
-      "version": "11.8.0",
-      "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.8.0.tgz",
-      "integrity": "sha512-MedQhoqVdr0U6SSnWPzfiadUcDHfN/Wzq25AkXiQv9oiOO/sG0S7XkvpFIqWBl9Yq1UYyYOOVORs5UW2XlPyzg==",
+      "version": "11.9.0",
+      "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.9.0.tgz",
+      "integrity": "sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw==",
       "engines": {
         "node": ">=12.0.0"
       }
diff --git a/package.json b/package.json
index e37b17c..c849f52 100644
--- a/package.json
+++ b/package.json
@@ -1,5 +1,5 @@
 {
   "dependencies": {
-    "highlight.js": "^11.8.0"
+    "highlight.js": "^11.9.0"
   }
 }
diff --git a/sitemap.xml b/sitemap.xml
index 5fe39e1..eb3cb74 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -3,277 +3,283 @@
 
 <url>
     <loc>https://dataflowr.github.io/website/modules/18a-diffusion/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/5-stacking-layers/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/14a-depth/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/graph0/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/graph1/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/notebooks_md/02a_basics/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/extras/jupyterlab/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/15-dropout/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/homework/1-mlp-from-scratch/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/homework/3-VAE/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/2b-automatic-differentiation/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/extras/graph_invariant/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/13-siamese/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/graph2/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/extras/invariant_equivariant/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/3-loss-functions-for-classification/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/extras/Convolutions_first/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/homework/2-CAM-adversarial/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/graph3/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/extras/GCN_inductivebias_spectral/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/12-intro-julia/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/notebooks_md/01_intro/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/8c-word2vec/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/9c-flows/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/1-intro-general-overview/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/privacy-preserving-ML/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/0-sotfware-installation/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/0-julia-setup/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/8b-collaborative-filtering/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/6-convolutional-neural-network/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/11a-recurrent-neural-networks-theory/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/4-optimization-for-deep-learning/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/10-generative-adversarial-networks/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/17-resnets/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/14b-depth/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/9b-unet/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/9a-autoencoders/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/2c-jax/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/11c-batches-with-sequences/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/16-batchnorm/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/8a-embedding-layers/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/7-dataloading/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>0.5</priority>
+</url>
+<url>
+    <loc>https://dataflowr.github.io/website/modules/19-clip/index.html</loc>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/12-attention/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/2a-pytorch-tensors/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>
 <url>
     <loc>https://dataflowr.github.io/website/modules/11b-recurrent-neural-networks-practice/index.html</loc>
-    <lastmod>2023-09-15</lastmod>
+    <lastmod>2023-11-12</lastmod>
     <changefreq>monthly</changefreq>
     <priority>0.5</priority>
 </url>