diff --git a/.gitignore b/.gitignore
index e53cc9e..5338c50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,15 +1,279 @@
+*.DS_Store
 
+## Core latex/pdflatex auxiliary files:
 *.aux
-*.bbl
-*.blg
+*.lof
 *.log
+*.lot
+*.fls
 *.out
-*.pdf
+*.toc
+*.fmt
+*.fot
+*.cb
+*.cb2
+.*.lb
+
+## Intermediate documents:
 *.dvi
-*.synctex.gz
-*.xwm
+*.xdv
+*-converted-to.*
+# these rules might exclude image files for figures etc.
+# *.ps
+# *.eps
+# *.pdf
+
+## Generated if empty string is given at "Please type another file name for output:"
+.pdf
+
+## Bibliography auxiliary files (bibtex/biblatex/biber):
+*.bbl
+*.bcf
+*.blg
+*-blx.aux
+*-blx.bib
+*.run.xml
+
+## Build tool auxiliary files:
 *.fdb_latexmk
-*.fls
-*.toc
+*.synctex
 *.synctex(busy)
-*.ps
+*.synctex.gz
+*.synctex.gz(busy)
+*.pdfsync
+
+## Build tool directories for auxiliary files
+# latexrun
+latex.out/
+
+## Auxiliary and intermediate files from other packages:
+# algorithms
+*.alg
+*.loa
+
+# achemso
+acs-*.bib
+
+# amsthm
+*.thm
+
+# beamer
+*.nav
+*.pre
+*.snm
+*.vrb
+
+# changes
+*.soc
+
+# comment
+*.cut
+
+# cprotect
+*.cpt
+
+# elsarticle (documentclass of Elsevier journals)
+*.spl
+
+# endnotes
+*.ent
+
+# fixme
+*.lox
+
+# feynmf/feynmp
+*.mf
+*.mp
+*.t[1-9]
+*.t[1-9][0-9]
+*.tfm
+
+#(r)(e)ledmac/(r)(e)ledpar
+*.end
+*.?end
+*.[1-9]
+*.[1-9][0-9]
+*.[1-9][0-9][0-9]
+*.[1-9]R
+*.[1-9][0-9]R
+*.[1-9][0-9][0-9]R
+*.eledsec[1-9]
+*.eledsec[1-9]R
+*.eledsec[1-9][0-9]
+*.eledsec[1-9][0-9]R
+*.eledsec[1-9][0-9][0-9]
+*.eledsec[1-9][0-9][0-9]R
+
+# glossaries
+*.acn
+*.acr
+*.glg
+*.glo
+*.gls
+*.glsdefs
+*.lzo
+*.lzs
+
+# uncomment this for glossaries-extra (will ignore makeindex's style files!)
+# *.ist
+
+# gnuplottex
+*-gnuplottex-*
+
+# gregoriotex
+*.gaux
+*.gtex
+
+# htlatex
+*.4ct
+*.4tc
+*.idv
+*.lg
+*.trc
+*.xref
+
+# hyperref
+*.brf
+
+# knitr
+*-concordance.tex
+# TODO Comment the next line if you want to keep your tikz graphics files
+*.tikz
+*-tikzDictionary
+
+# listings
+*.lol
+
+# luatexja-ruby
+*.ltjruby
+
+# makeidx
+*.idx
+*.ilg
+*.ind
+
+# minitoc
+*.maf
+*.mlf
+*.mlt
+*.mtc[0-9]*
+*.slf[0-9]*
+*.slt[0-9]*
+*.stc[0-9]*
+
+# minted
+_minted*
+*.pyg
+
+# morewrites
+*.mw
+
+# nomencl
+*.nlg
+*.nlo
+*.nls
+
+# pax
+*.pax
+
+# pdfpcnotes
+*.pdfpc
+
+# sagetex
+*.sagetex.sage
+*.sagetex.py
+*.sagetex.scmd
+
+# scrwfile
+*.wrt
+
+# sympy
+*.sout
+*.sympy
+sympy-plots-for-*.tex/
+
+# pdfcomment
+*.upa
+*.upb
+
+# pythontex
+*.pytxcode
+pythontex-files-*/
+
+# tcolorbox
+*.listing
+
+# thmtools
+*.loe
+
+# TikZ & PGF
+*.dpth
+*.md5
+*.auxlock
+
+# todonotes
+*.tdo
+
+# vhistory
+*.hst
+*.ver
+
+# easy-todo
+*.lod
+
+# xcolor
+*.xcp
+
+# xmpincl
+*.xmpi
+
+# xindy
+*.xdy
+
+# xypic precompiled matrices and outlines
+*.xyc
+*.xyd
+
+# endfloat
+*.ttt
+*.fff
+
+# Latexian
+TSWLatexianTemp*
+
+## Editors:
+# WinEdt
+*.bak
+*.sav
+
+# Texpad
+.texpadtmp
+
+# LyX
+*.lyx~
+
+# Kile
+*.backup
+
+# gummi
+.*.swp
+
+# KBibTeX
+*~[0-9]*
+
+# TeXnicCenter
+*.tps
+
+# auto folder when using emacs and auctex
+./auto/*
+*.el
+
+# expex forward references with \gathertags
+*-tags.tex
+
+# standalone packages
+*.sta
+
+# Makeindex log files
+*.lpz
+main.pdf
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 9e713c2..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-sudo: required
-dist: xenial
-language: generic
-services:
-  - docker
-install:
-  - docker pull aergus/latex
-script:
-  - docker run --rm -it -v$(pwd):/MIDA2 -w /MIDA2 aergus/latex bash -c 'latexmk -pdf -f MIDA2.tex || true'
-branches:
-  except:
-    - /^travis-.*/
-
-before_deploy:
-  - git config --local user.name "Travis Builder"
-  - git config --local user.email "travis@travis.com"
-  - export TRAVIS_TAG=${TRAVIS_TAG:-travis-$(date +'%Y%m%d-%H%M%S')-$(git log --format=%h -1)}
-  - git tag $TRAVIS_TAG
-deploy:
-  provider: releases
-  api_key:
-    secure: "GugtkMpmZStvPDyY7Cn6KkRqM321PWxOoXKlm4TlkPIE1eGeLfyVFb9eag+r9LhNuuCxBnKFd8XGJU8w9cXfrh28NtKA/LLI1o3UrkSFY5vjjndXb0LeqTnLXj/l439DaeNBZ0PVF+fwMb6grFxwg0u4B16d7ggph3vwLRRmEk9XMWCQsnjkeQ9J3ehBkn/KaZ7pOrAe6yolJ49+EzRulSoM35+hJSFijSC4sBOzu5f66boXEbZGCbEzEttcZy4EfFfnpneclEhZeHLWj6hZeAnofvDAS5/BIT9+JSAdqu4gD8pk6vP86kVOEw4LA4LR9BXUpXU6Uv2QIkmArHQPvYcBlesaqFUMuOqst8gCCNd6zrvxbbxMDNc3j+W+9PNMpuVFhNX82vx/d1AL3TWgjjsIqNZTZxvGeC4w1apxbFLdzA9zi3pINaPaBJlzvUBpxBP92U57B+/ftyiSfQ8rsxSuNCKL0t9GwZiD7hJ25x79zqy/2MfauTJipwJQtd6QnJUPX7ROwiYIzn/R+7ALS58rcGNGhxQgIOnzb6PjV3YaJyR9PyAqH6V10SX8BOHigQZbKk1U7APPqIXdhBq/DM3DlgL7bvWZgKXyQgby6PtYE7PdX1rXcMorPraEP6bGDQBAle8FhI6s8P8sLOZYqlUC58phgLZbI6n3ojm37/8="
-  file: MIDA2.pdf
-  skip_cleanup: true
diff --git a/LICENSE b/LICENSE-CC-BY-NC-SA
similarity index 100%
rename from LICENSE
rename to LICENSE-CC-BY-NC-SA
diff --git a/MIDA2.tex b/MIDA2.tex
deleted file mode 100644
index 4250395..0000000
--- a/MIDA2.tex
+++ /dev/null
@@ -1,45 +0,0 @@
-\documentclass{easyclass}
-
-\usepackage{todonotes}
-\usepackage{mathtools}
-\usepackage{cancel}
-\usepackage{xfrac}
-\usepackage{float}
-\usepackage{caption}
-
-\begin{document}
-\begin{titlepage}
-    \university{Politicnico di Milano}
-    \courseid{051589 -- MIDA2}
-    \title{Model Identification and Data Analysis \\ Part 2}
-    \author{\textsc{Edoardo Morassutto}}
-    \contributors{\textsc{Marco Donadoni}\\\textsc{Cosimo Russo}\\\textsc{Federico Cazzola}}
-    \version{A.Y. 2019 -- 2020}
-    \instructor{Prof. \textsc{Sergio Savaresi}\\
-    Ing. \textsc{Stefano Dattilo}}
-    \maketitle
-\end{titlepage}
-
-\tableofcontents
-
-\input{lectures/2020-04-16.tex}
-\input{lectures/2020-04-20.tex}
-\input{lectures/2020-04-21.tex}
-\input{lectures/2020-04-22.tex}
-\input{lectures/2020-04-23.tex}
-\input{lectures/2020-04-27.tex}
-\input{lectures/2020-04-30.tex}
-\input{lectures/2020-05-04.tex}
-\input{lectures/2020-05-05.tex}
-\input{lectures/2020-05-07.tex}
-\input{lectures/2020-05-11.tex}
-\input{lectures/2020-05-12.tex}
-\input{lectures/2020-05-14.tex}
-\input{lectures/2020-05-18.tex}
-\input{lectures/2020-05-19.tex}
-\input{lectures/2020-05-25.tex}
-\input{lectures/2020-05-27.tex}
-\input{lectures/2020-06-03.tex}
-\input{lectures/2020-06-04.tex}
-
-\end{document}
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..1cfb8ac
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+PRINCIPALE = main
+PRINCIPALE_TEX = $(PRINCIPALE).tex
+PRINCIPALE_PDF = $(PRINCIPALE).pdf
+FILE_CLEAN = *.aux *.log *.out *.xdv *.toc *.fls *.fls *.fdb_latexmk *.synctex.gz *.synctex\(busy\)
+FILE_DISTCLEAN =  $(PRINCIPALE_PDF)
+
+.PHONY: distclean clean pdf
+
+pdf: $(PRINCIPALE_PDF)
+
+$(PRINCIPALE_PDF): $(PRINCIPALE_TEX) *.tex
+	git show -s --format=%H > commit_hash.part
+	latexmk -pdf -jobname=$(PRINCIPALE) $(PRINCIPALE_TEX)
+	rm -f commit_hash.part
+
+clean:
+	rm -f $(FILE_CLEAN) commit_hash.part
+
+distclean : clean
+	rm -f $(FILE_DISTCLEAN)
\ No newline at end of file
diff --git a/README.md b/README.md
index bd92a45..6f7902b 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,34 @@
- # MIDA2
+[![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa]
+
+> **WHAT ABOUT MIDA1?** Here you can find the [MIDA1 repository](https://github.com/teobucci/mida).
+
+# Model Identification and Data Analysis - Part 2
+
+Lecture notes of the [Model Identification and Data Analysis - Part 2 (051589)](https://www11.ceda.polimi.it/schedaincarico/schedaincarico/controller/scheda_pubblica/SchedaPublic.do?&evn_default=evento&c_classe=765794&polij_device_category=DESKTOP&__pj0=0&__pj1=42d68cc1050be0569561dabe2a5df40b) course at Politecnico di Milano, A.Y. 2021-2022.
+
+## Project structure
+
+The main file to be compiled is `main.tex`.
+The others source files have been imported with  `\input{file}` and they shouldn't be directly compiled. However, at the top of each source file there is the command `%!TEX root = ../main.tex`, thus, one can also compile the project by compiling whatever source `.tex` file.
+
+## Updates and Errors
+
+This is the result of the reviews and updates of class notes taken during A.Y. 2019-2020 and could contains errors and inaccuracies. Besides, no lecturer has reviewed the contents of this files. Therefore, use it at will and with responsibility. 
+If you want to adjust or modify the document you can do that via a Pull Request or you can contact via email. 
+
+## Autors 
+
+- Edoardo Morassutto ([@edomora97](https://github.com/edomora97))<br>edoardo.morassutto@gmail.com
+- Andrea Bosisio ([@andreabosisio](https://github.com/andreabosisio))<br>andrea2.bosisio@mail.polimi.it
+
+### Other credits
+
+The style of this LaTeX document is taken from the [MIDA1 repository](https://github.com/teobucci/mida).
+
+## License
+
+This work is licensed under [Creative Commons BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/). In particular, without the authors' permission, it is forbidden to make digital or printed copies to sell them.
+
+[cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
+[cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
 
- Class notes of the Model Identification and Data Analysis - Part 2 course at Politecnico di Milano, a.y. 2019-2020.
diff --git a/easyclass.cls b/easyclass.cls
deleted file mode 100644
index 8a0e911..0000000
--- a/easyclass.cls
+++ /dev/null
@@ -1,315 +0,0 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% easyclass.cls - LaTeX2e class for Lecture Note use.
-%% To Make Lecture Note for Computer Science Courses
-%%	Version 1.00
-%%  written by Naijia Fan, 2019
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\NeedsTeXFormat{LaTeX2e}
-\ProvidesClass{easyclass}[2/6/2019 custom lecture note class]
-\LoadClass[twoside=false]{scrbook}
-\RequirePackage{mathtools,amssymb,bm,bbold,enumerate}
-\RequirePackage[dvipsnames]{xcolor}
-\RequirePackage{hyperref,cleveref}
-
-\newcommand\myshade{90}
-\colorlet{mylinkcolor}{NavyBlue}
-\colorlet{mycitecolor}{Aquamarine}
-\colorlet{myurlcolor}{Aquamarine}
-
-\hypersetup{
-  linkcolor  = mylinkcolor!\myshade!black,
-  citecolor  = mycitecolor!\myshade!black,
-  urlcolor   = myurlcolor!\myshade!black,
-  colorlinks = true,
-}
-
-%--------------------------------------------------------------------
-% Bibliography
-\RequirePackage[]{natbib}
-\bibliographystyle{chicago}
-
-%--------------------------------------------------------------------
-% Theorem
-%=================================
-% pre-defined theorem environments
-% custom theorem boxes
-\RequirePackage[framemethod=TikZ]{mdframed}
-\RequirePackage{amsthm}
-\newtheorem{theorem}{Theorem}[chapter]
-\newtheorem{lemma}[theorem]{Lemma}
-\newtheorem{proposition}{Proposition}
-\newtheorem{corollary}{Corollary}
-\newtheorem{definition}{Definition}
-\newtheorem*{assumption}{Assumption}
-
-%=================================
-% generic definition of the colored boxes
-\newcommand{\defineblock}[3]{%
-  \newenvironment{#1}[1][]{%
-    \ifstrempty{##1}%
-    {%
-      \mdfsetup{%
-        frametitle={%
-          \tikz[baseline=(current bounding box.east),outer sep=0pt]%
-          \node[anchor=east,rectangle,fill=#3]%
-          {\strut #2};%
-        }%
-      }%
-    }%
-    {%
-      \mdfsetup{%
-        frametitle={%
-          \tikz[baseline=(current bounding box.east),outer sep=0pt]%
-          \node[anchor=east,rectangle,fill=#3]%
-          {\strut #2:~##1};%
-        }%
-      }%
-    }%
-    \mdfsetup{%
-      innertopmargin=10pt,%
-      linecolor=#3,%
-      linewidth=2pt,%
-      topline=true,%
-      frametitleaboveskip=\dimexpr-\ht\strutbox\relax%
-    }%
-    \begin{mdframed}[]\relax%
-  }%
-  {%
-    \end{mdframed}%
-  }%
-}
-
-\defineblock{exercise}{Exercise}{red!40}
-\defineblock{remark}{Remark}{purple!20}
-\defineblock{example}{Example}{orange!20}
-\defineblock{recall}{Recall}{orange!20}
-
-%=================================
-% useful commands
-\DeclareMathOperator*{\argmin}{arg\,min}
-\DeclareMathOperator*{\argmax}{arg\,max}
-\DeclareMathOperator*{\supp}{supp}
-\DeclareMathOperator*{\rank}{rank}
-
-\def\vec#1{{\ensuremath{\bm{{#1}}}}}
-\def\mat#1{\vec{#1}}
-\def\bvec#1{\mathbf{#1}}
-
-%=================================
-% convenient notations
-\newcommand{\XX}{\mathbb{X}}
-\newcommand{\RR}{\mathbb{R}}
-\newcommand{\EE}{\mathbb{E}}
-\newcommand{\PP}{\mathbb{P}}
-
-\newcommand{\sL}{\mathcal{L}}
-\newcommand{\sX}{\mathcal{X}}
-\newcommand{\sY}{\mathcal{Y}}
-
-\newcommand{\ind}{\mathbb{1}}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Typography
-\RequirePackage[T1]{fontenc}
-
-% Disable paragraph indentation, and increase gap
-\RequirePackage{parskip}
-
-%=================================
-% header and footer
-\RequirePackage{scrlayer-scrpage}
-\pagestyle{scrheadings}
-\deftripstyle{pagestyle}
-%   [0.5pt]
-%   [0.3pt]
-  {}
-  {}
-  {\headmark}
-  {}
-  {\pagemark}
-  {}
-
-\pagestyle{pagestyle}
-\renewcommand{\chapterpagestyle}{pagestyle}
-
-
-%=================================
-% draw pictures
-\RequirePackage{tikz}
-\RequirePackage{pgfplots}
-\usetikzlibrary{arrows,arrows.meta,decorations.pathreplacing,decorations.pathmorphing,matrix,positioning,calc,shapes,patterns}
-\tikzstyle{labeled}=[execute at begin node=$\scriptstyle,
-   execute at end node=$]
-\RequirePackage{graphicx}
-\pgfmathsetseed{42}
-% Cross
-\tikzset{cross/.style={cross out, draw=black, minimum size=2*(#1-\pgflinewidth), inner sep=0pt, outer sep=0pt},
-  %default radius will be 1pt.
-  cross/.default={0.1cm}}
-% tikz styles for the block diagrams
-\tikzstyle{int}  = [draw, fill=blue!20, minimum size=2em]
-\tikzstyle{init} = [pin edge={to-,thin,black}]
-\tikzstyle{sum}  = [draw, fill=blue!20, circle, node distance=1cm]
-% pattern
-\tikzset{
-  hatch distance/.store in=\hatchdistance,
-  hatch distance=10pt,
-  hatch thickness/.store in=\hatchthickness,
-  hatch thickness=0.2pt
-}
-\pgfdeclarepatternformonly[\hatchdistance,\hatchthickness]{flexible hatch}
-{\pgfqpoint{0pt}{0pt}}
-{\pgfqpoint{\hatchdistance}{\hatchdistance}}
-{\pgfpoint{\hatchdistance-1pt}{\hatchdistance-1pt}}%
-{
-  \pgfsetcolor{\tikz@pattern@color}
-  \pgfsetlinewidth{\hatchthickness}
-  \pgfpathmoveto{\pgfqpoint{0pt}{0pt}}
-  \pgfpathlineto{\pgfqpoint{\hatchdistance}{\hatchdistance}}
-  \pgfusepath{stroke}
-}
-% double border
-\tikzstyle{double border} = [double, double distance=0.5mm]
-% pattern in border
-\newcounter{tmp}
-\newif\ifpathisclosed
-\tikzset{dashed border/.style={
-    preaction={decoration={contour lineto closed, contour distance=2pt},
-      decorate,
-    },
-    postaction={
-      insert path={%
-        \pgfextra{%
-          \pgfinterruptpath
-          \path[pattern=north west lines, pattern color=black,even odd rule]
-          \mySecondList \myList
-          ;
-        \endpgfinterruptpath}
-    }},
-}}
-\def\pgfdecoratedcontourdistance{0pt}
-\pgfset{
-  decoration/contour distance/.code=%
-\pgfmathsetlengthmacro\pgfdecoratedcontourdistance{#1}}
-\pgfdeclaredecoration{contour lineto closed}{start}{%
-  \state{start}[
-  next state=draw,
-  width=0pt,
-  persistent precomputation=\let\pgf@decorate@firstsegmentangle\pgfdecoratedangle]{%
-    %\xdef\myList{}\xdef\mySecondList{}%
-    \setcounter{tmp}{0}%
-    \global\pathisclosedfalse%
-    \pgfpathmoveto{\pgfpointlineattime{.5}
-      {\pgfqpoint{0pt}{\pgfdecoratedcontourdistance}}
-    {\pgfqpoint{\pgfdecoratedinputsegmentlength}{\pgfdecoratedcontourdistance}}}%
-  }%
-  \state{draw}[next state=draw, width=\pgfdecoratedinputsegmentlength]{%
-    \ifpgf@decorate@is@closepath@%
-      \pgfmathsetmacro\pgfdecoratedangletonextinputsegment{%
-      -\pgfdecoratedangle+\pgf@decorate@firstsegmentangle}%
-    \fi
-    \pgfmathsetlengthmacro\pgf@decoration@contour@shorten{%
-    -\pgfdecoratedcontourdistance*cot(-\pgfdecoratedangletonextinputsegment/2+90)}%
-    \pgfpathlineto
-    {\pgfpoint{\pgfdecoratedinputsegmentlength+\pgf@decoration@contour@shorten}
-    {\pgfdecoratedcontourdistance}}%
-    \stepcounter{tmp}%
-    \pgfcoordinate{muemmel\number\value{tmp}}{\pgfpoint{\pgfdecoratedinputsegmentlength+\pgf@decoration@contour@shorten}
-    {\pgfdecoratedcontourdistance}}%
-    \pgfcoordinate{feep\number\value{tmp}}{\pgfpoint{\pgfdecoratedinputsegmentlength}{0pt}}%
-    \ifnum\value{tmp}=1\relax%
-     \pgfcoordinate{muemmel0}{\pgfpoint{0pt}{\pgfdecoratedcontourdistance}}%
-     \pgfcoordinate{feep0}{\pgfpoint{0pt}{0pt}}%
-     \xdef\myList{(muemmel\number\value{tmp})}%
-     \xdef\mySecondList{(feep\number\value{tmp})}%
-    \else
-     \xdef\myList{\myList -- (muemmel\number\value{tmp})}%
-     \xdef\mySecondList{(feep\number\value{tmp}) -- \mySecondList}%
-    \fi
-    \ifpgf@decorate@is@closepath@%
-      \pgfpathclose
-      \global\pathisclosedtrue%
-    \fi
-  }%
-  \state{final}{%\typeout{\myList,\mySecondList}%
-    \ifpathisclosed%
-      \xdef\myList{\myList -- cycle}%
-      \xdef\mySecondList{\mySecondList -- cycle}%
-      %\typeout{closed \mySecondList \myList }
-    \else
-      %\typeout{\number\value{tmp}}%
-      \xdef\myList{(muemmel0) -- \myList -- cycle}%
-      \xdef\mySecondList{\mySecondList -- (feep0) --}%
-      %\typeout{not closed \mySecondList \myList }%
-    \fi
-  }%
-}
-\tikzset{
-  contour/.style={
-    decoration={
-      name=contour lineto closed,
-      contour distance=#1
-    },
-decorate}}
-
-
-%=================================
-% title page
-% define university
-\def\@university{no university}
-\newcommand{\university}[1]{
-  \def\@university{#1}
-}
-% define course id
-\def\@courseid{CS 000}
-\newcommand{\courseid}[1]{
-  \def\@courseid{#1}
-}
-% define version
-\def\@version{Great Year}
-\newcommand{\version}[1]{
-  \def\@version{#1}
-}
-% define instructor info
-\def\@instructor{Instructor}
-\newcommand{\instructor}[1]{
-  \def\@instructor{#1}
-}
-% define contributors info
-\def\@contributors{Contributors}
-\newcommand{\contributors}[1]{
-  \def\@contributors{#1}
-}
-% make title
-\renewcommand{\maketitle}{
-    \centering
-    {\scshape{\Large  \@university \par \@courseid} \par}
-    \vspace{1.5cm}
-    {\huge\bfseries{\@title} \par}
-    \vspace{2cm}
-    {{\Large Instructors}\par \@instructor}
-    \vspace{2cm}
-    % \includegraphics[width=0.25\textwidth]{figures/funny.png}
-    \vfill
-    % Bottom of the page
-    {{\Large Author}\par \@author\par}
-    \vspace*{0.5cm}
-    {{\Large Contributors}\par \@contributors\par}
-    \vspace*{0.5cm}
-    {\large \@version \par}
-}
-
-%=================================
-% add current instructor info
-% on the right
-\RequirePackage{marginnote}
-\newcommand{\newlecture}[2]{%
-  % \marginpar{#1\\#2}%
-  \marginpar{\noindent\fbox{%
-    \parbox{1.5cm}{%
-      \footnotesize
-      #1\\#2
-    }%
-  }}
-}
diff --git a/firstpages/copyright.tex b/firstpages/copyright.tex
new file mode 100644
index 0000000..ed13aa6
--- /dev/null
+++ b/firstpages/copyright.tex
@@ -0,0 +1,29 @@
+%!TEX root = ../main.tex
+
+\vspace*{\stretch{12}}
+
+\textcopyright \ The authors. Some rights reserved.
+
+This work is licensed under CC BY-NC-SA 4.0.\\
+\url{http://creativecommons.org/licenses/by-nc-sa/4.0/}
+
+In particular, without the authors' permission, it is forbidden to make digital or printed copies to sell them.
+
+The \latex source code is available at\\
+\url{https://github.com/polimi-cheatsheet/MIDA2}
+
+\vspace*{\stretch{2}}
+
+\textsc{Document created on \today}
+\IfFileExists{./commit_hash.part}{\\\textsc{Version} \texttt{\input{commit_hash.part}}}{}
+
+\vspace*{\stretch{2}}
+
+\textsc{Developed by:}\\
+\textsc{Edoardo Morassutto}\\
+\textsc{Marco Donadoni}\\
+\textsc{Cosimo Russo}\\
+\textsc{Federico Cazzola}\\
+\textsc{Andrea Bosisio} - \texttt{andrea2.bosisio@mail.polimi.it}
+
+\vspace*{\stretch{5}}
diff --git a/firstpages/preface.tex b/firstpages/preface.tex
new file mode 100644
index 0000000..59d81a2
--- /dev/null
+++ b/firstpages/preface.tex
@@ -0,0 +1 @@
+%!TEX root = ../main.tex
diff --git a/img/README.md b/img/README.md
new file mode 100644
index 0000000..37bb172
--- /dev/null
+++ b/img/README.md
@@ -0,0 +1,2 @@
+TODO: delete this folder when this single image will be replaced by a tikz figure.
+
diff --git a/img/freq-emphasis.png b/img/freq-emphasis.png
new file mode 100644
index 0000000..1bd6663
Binary files /dev/null and b/img/freq-emphasis.png differ
diff --git a/lectures/2022_04_06.tex b/lectures/2022_04_06.tex
new file mode 100644
index 0000000..75f0edb
--- /dev/null
+++ b/lectures/2022_04_06.tex
@@ -0,0 +1,346 @@
+%!TEX root = ../main.tex
+\setcounter{chapter}{-1}
+\chapter{Introduction}
+
+\section{Prerequisites}
+The \emph{Model Identification and Data Analysis - Part 2}\footnote{For more information about the MIDA course see \href{https://www4.ceda.polimi.it/manifesti/manifesti/controller/ManifestoPublic.do?EVN_DETTAGLIO_RIGA_MANIFESTO=evento&k_corso_la=481&k_indir=T2A&idItemOfferta=156912&idGruppo=4332&idRiga=271034&codDescr=051587&semestre=2&aa=2021&lang=EN&jaf_currentWFID=main}{here}.} course is a graduate level course of the MSc in Computer Science and Engineering held at Politecnico di Milano; hence, familiarity with basic concepts of computer science (algorithms and complexity), dynamical systems theory and a
+mathematical maturity in linear algebra and probability theory are prerequisites. 
+
+Furthermore, in order to understand the concepts of this course it is recommended to firstly follow the \emph{Model Identification and Data Analysis - Part 1}\footnote{MIDA1 lecture notes available \href{https://github.com/teobucci/mida/releases}{here}.} course.
+
+\section{General topics of MIDA course}
+
+\begin{itemize}
+    \item Collect digitally data from real systems
+    \item Build \acrfull{bb} (or \acrfull{gb}) models from data, with emphasis on
+    \begin{itemize}
+        \item Dynamic systems
+        \item Control/automation-oriented applications
+    \end{itemize}
+    \item Purpose of modelling (area of machine learning focusing on ``control'')
+    \begin{itemize}
+        \item Prediction
+        \item Software-sensing
+        \item Modelling for control design
+    \end{itemize}
+\end{itemize}
+
+\subsection{Super summary of MIDA1}
+The focus was on \emph{Time Series} (output-only systems) and \emph{input/output} (I/O) systems.
+
+Models used in MIDA1:
+\begin{itemize}
+    \item \gls{arma} models for time series
+    \item \gls{armax} models for I/O systems
+\end{itemize}
+
+\begin{figure}[H]
+    \begin{minipage}[t]{0.4\textwidth}
+	\centering
+	\begin{tikzpicture}
+
+		\node (input) at (0,0) {};
+		\draw[-] (input.east) -- (1,0)
+		    node[midway,above] {$e(t)$}
+		    node[at end] (inputR) {};
+		
+		% blocks
+		\node[block, right=0cm of inputR] (w1) {$\frac{C(z)}{A(z)}$};
+		
+		% connect block with input
+		\draw[-stealth] (inputR.center)|-(w1.west);
+		
+		\draw[-stealth] (w1.east) -- ++(1,0)
+		    node[midway, above] {$y(t)$}
+		;
+	\end{tikzpicture}
+        \caption*{ARMA model}
+    \end{minipage}
+    \begin{minipage}[t]{0.4\textwidth}
+        \centering
+	\begin{tikzpicture}
+		% place nodes
+		\node [sum] (sum) at (0,0){};
+		\node [block, left=1.5cm of sum] (wu) {$z^{-k}\frac{B(z)}{A(z)}$};
+		\node [block,above left=0.7cm and 0.5cm of sum] (we) {$\frac{C(z)}{A(z)}$};
+
+		% connect nodes
+		\draw[stealth-] (wu.west) -- ++(-1,0) node[midway, above]{$u(t)$};
+		\draw[stealth-] (we.west) -- ++(-1,0) node[midway, above]{$e(t)$};
+
+		\draw[-stealth] (wu.east) -- (sum.west)
+			node[midway, above] {$y_{1}(t)$}
+			node[very near end, below] {$+$};
+		\draw[-stealth] (we.east) -| (sum.north)
+			node[midway, above] {$y_{2}(t)$}
+			node[very near end, right] {$+$};
+
+		\draw[-stealth] (sum.east) -- ++(1,0) node[midway, above] {$y(t)$};
+	\end{tikzpicture}
+        \caption*{\gls{armax} model}
+    \end{minipage}
+\end{figure}
+We've used a \acrlong{bb} model identification method to find the model of the real system. The model is indicated as $\Mc(\theta)$ where $\theta$ is the parameter vector, which contains the coefficients of $A(z)$, $B(z)$, $C(z)$.
+
+In particular, we've seen the \textbf{\acrfull{pem}} method, a \emph{parametric approach} based on the minimization of the \emph{performance index} defined as:
+\begin{defn}[\gls{pem} cost function]
+    $J_{N}(\theta) = \frac{1}{N} \sum_{t=1}^N \left(y(t) - \hat{y}(t|t-1, \theta)\right)^2$
+\end{defn}
+
+which is the variance of the \emph{prediction error} $\epsilon(t)$ made by the model. The optimal $\theta$ is $\hat{\theta}_N = \argmin_\theta J_{N}(\theta)$
+
+\subsection{MIDA 2}
+
+The focus is on \emph{Dynamical System} with a special eye on control (or feedback) application (more close to real applications than time series). We can divide the course in three main blocks: \emph{Advanced Model Identification} methods, \emph{Software-Sensing (SW)} and \emph{Minimum Variance Control}.\\
+In particular we will see: 
+
+\begin{itemize}
+    \item Non-parametric (direct/constructive) \gls{bb} identification of I/O systems using \acrlong{ss} models
+    \item Parametric identification for \gls{bb} I/O systems, with a frequency-domain approach
+    \item Kalman-filter for SW-sensing using feedback on \gls{wb} models
+    \item \gls{bb} methods for SW-sensing without feedback
+    \item \gls{gb} system identification using Kalman-filter and using \emph{simulation-error methods} (SEM)
+    \item  Minimum-Variance Control (MVC), design of optimal feedback controllers using the theory background of the MIDA course
+    \item Recursive (online) implementation of algorithms for system identification
+\end{itemize}
+
+\section{Motivation example for the course: ABS (Anti-Lock Braking System)} \label{abs_ex}
+ABS is an example of a control system since it follows this general scheme:
+\begin{figure}[H]
+    \centering
+    	\begin{tikzpicture}
+		% place nodes
+		\node [block] (ctrl) at (0,0) {$\text{Control Algorithm}$};
+		\node [block, right=2cm of ctrl] (system) {$\text{System}$};
+
+		% connect nodes
+		\draw [stealth-] (ctrl.west) -- ++(-2,0) node[midway,above] {$\bar{y}(t)$};
+		\draw [-stealth] (ctrl.east) -- (system.west) node[midway,above] {$u(t)$};
+		\draw [-stealth] (system.east) -- ++(2,0)  
+			node[midway,above] {$y(t)$}
+			node[midway] (yt) {}; ;
+		\draw [-] (yt.center) --+(0,-1)
+			node[at end] (fb) {};
+		\draw[-stealth] (fb.center)-|(ctrl.south);
+	\end{tikzpicture}
+	\caption*{Control System Scheme}
+\end{figure}
+    
+where $\bar{y}(t)$ is the reference value of $y(t)$.\\
+
+We define the \emph{slip} of the wheel as $\lambda(t) = \frac{v(t)-\omega(t) r}{v(t)}$, where $v(t)$ is the horizontal velocity of the car, $\omega(t)$ is the angular velocity of the wheel and $r$ is the radius of the wheel.\\
+
+During a brake $0 \le \lambda (t) \le 1$ (from free rolling wheel (i.e. $v(t) = \omega (t) r$) to locked wheel (i.e. $\omega(t) = 0)$).\\
+The curve of $F_{x}$, the braking force, is as showed below:
+\vspace{-3cm}
+\begin{figure}[h!]
+    \centering
+    \resizebox{8cm}{!}{%
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex',scale=1.5]
+        \draw[->] (-0.5,0) -- (2.5,0) node[right] {$\lambda$};
+        \draw[->] (0,-0.5) -- (0,2.3) node[left] {$F_x$};
+        \begin{scope}[scale=2]
+            \draw[dotted] (0.23, 1) -- (0.23, 0) node[below] {$\bar{\lambda}$};
+            \draw[dotted] (1, 0.75) -- (1, 0) node[below] {$1$};
+            \node[red,rotate=-23] at (0.65,1) {unstable};
+            \node[green!60!black,rotate=77] at (0.13,0.5) {stable};
+            \begin{scope}
+                \clip (0,0) rectangle (0.23, 2);
+                \draw [green!60!black,line width=0.4mm] plot [smooth] coordinates {(0,0) (0.2, 1) (1, 0.75)};
+            \end{scope}
+            \begin{scope}
+                \clip (0.23, 0) rectangle (1, 2);
+                \draw [red,line width=0.4mm] plot [smooth] coordinates {(0,0) (0.2, 1) (1, 0.75)};
+            \end{scope}
+        \end{scope}
+    \end{tikzpicture}
+    }
+    \caption*{Relation between $\lambda$ and the braking force.}
+\end{figure}
+
+
+In the case of ABS, $u(t) = x(t)$ which is the voltage of the electric braking motor (control variable), $y(t) = \lambda (t)$ (controlled variable) and $\bar{y}(t) = \bar{\lambda} (t)$, which is the maximum braking point that we want to reach during an emergency brake.
+
+The problem can be divided into subproblems:
+\begin{itemize}
+    \item Model of the system
+    \item SW-estimation of $\lambda$ since $v$ is \textbf{not} directly measurable, so $\lambda$ cannot be computed
+    \item Design of the ABS control algorithm
+\end{itemize}
+
+Because of that measurement problem we have to build a \acrlong{bb} model from data.\\
+
+Why \acrlong{bb} modelling?
+The control variable $x$ (the voltage to the actuator) controls a complex systems from the actuator to $\lambda$.
+The system can be seen as a chain of components:
+\begin{itemize}
+    \item Current dynamics and electric motor
+    \item Position dynamics of the actuator
+    \item Dynamics of the hydraulic circuit of the braking system
+    \item Tire dynamics
+    \item Wheel rotational dynamics
+    \item Vehicle full dynamics
+\end{itemize}
+
+It's simply deducible that such system is really difficult to model.
+
+\begin{rem}[\acrfull{wb} vs. \acrfull{bb}]
+    \hfill \break
+    \begin{itemize}
+	   \item \gls{wb} modelling: write the physical equations from \emph{first principles}.
+	   \item \gls{bb} modelling: experiment $\rightarrow$ collect data $\rightarrow$ build model.
+        Using only I/O measured data we can \emph{learn} a mathematical model of the I/O behavior of the system.
+    \end{itemize}
+\end{rem}
+
+In order to estimate the variable $v(t)$ (and so also $\lambda(t)$) we use an SW-sensing algorithm which takes as inputs other measurable variables (e.g. angular velocities of all the wheels). \\
+Thus, the scheme of a general control system becomes something like this: 
+
+\begin{figure}[H]
+    \centering
+    	\begin{tikzpicture}
+		% place nodes
+		\node [block] (ctrl) at (0,0) {$\text{Control Algorithm}$};
+		\node [block, right=2cm of ctrl] (system) {$\text{System}$};
+		\node [block, below=1.5cm of ctrl] (sw) {$\text{SW-sensing Algorithm}$};
+
+		% connect nodes
+		\draw [stealth-] (ctrl.west) -- ++(-2,0) node[midway,above] {$\bar{y}(t)$};
+		\draw [-stealth] (ctrl.east) -- (system.west) node[midway,above] {$u(t)$};
+		\draw [-stealth] (system.east) -- ++(2,0)  node[midway,above] {$y(t)$};
+		\draw [-stealth] (system.south) |- (sw.east)  node[midway, right] {$\Phi(t)$};
+		\draw[-stealth] (sw.north)-|(ctrl.south) node[near end, right] {$\hat{y}(t)$};
+	\end{tikzpicture}
+    \vspace{5pt}
+	\caption*{Control System Scheme with SW-sensing}
+\end{figure}
+
+where $\Phi(t)$ are the available (measurable) variables of the system and $\hat{y}(t)$ is the estimation of the SW-sensing algorithm of $y(t)$
+
+\chapter{\acrlong{bb} non-parametric identification of I/O systems using \acrlong{ss} models in time domain}
+
+\vspace{-12pt}
+\begin{figure}[H]
+    \centering
+    	\begin{tikzpicture}
+    		% place nodes
+		\node [block] (sys) at (0,0) {System};
+	
+		% connect nodes
+		\draw [stealth-] (sys.west) -- ++(-1,0) node[midway,below] {$u(t)$};
+		\draw [-stealth] (sys.east) -- ++(1,0) node[midway,below] {$y(t)$};
+         	\draw[dashed, stealth-] (sys.north) -- ++(0,1) node[midway,right] {$d(t)$ (not measured disturbance)};
+    \end{tikzpicture}
+\end{figure}
+
+
+\begin{rem}[General path of a Parametric Identification Method]
+\hfill \break
+    \begin{enumerate}
+        \item Collect data: inputs: $\left\{u(1), u(2), \ldots, u(N)\right\}$, outputs: $\left\{y(1), y(2), \ldots, y(N)\right\}$
+        \item Select \textbf{a-priori} a class/family of \textbf{parametric models}: $\Mc(\theta)$
+        \item Select \textbf{a-priori} a performance index $J(\theta)$ (which gives an order to the quality of the models)
+        \item Optimization step (minimize $J(\theta)$ w.r.t $\theta$): $\hat{\theta}_N = \argmin_\theta J(\theta)$ $\rightarrow$ optimal model $\Mc(\hat{\theta}_N)$ characterized by the \textbf{optimal parameters} $\hat{\theta}_N$
+    \end{enumerate}
+    
+    \textbf{Note}:    $J(\theta): \RR^{n_\theta} \rightarrow \RR^+$ (where $n_\theta$ is the order of the model).
+    
+    \textbf{Note}: $\Mc(\theta)$ can be sorted, that is $\Mc(\theta_1)$ is better than $\Mc(\theta_2)$ if $J(\theta_1) < J(\theta_2)$.
+
+\end{rem}
+
+In this chapter we are presenting a totally different system identification approach, the \textbf{non-parametric} one, which means:
+\begin{itemize}
+    \item No a-priori model-class selection
+    \item No performance index definition
+    \item No optimization task
+\end{itemize}
+
+Before entering in this system identification algorithm we need to recall the three main mathematical representation of \textbf{Discrete-time Dynamic Linear Systems} which are characterized by their internal variables called \emph{states} and indicated with $x(t)$.
+
+
+\section{Representations}
+
+\subsection{Representation \#1: \acrfull{ss}}
+
+\[
+\Sc: 
+\begin{cases}
+    x(t+1) = F x(t) + G u(t) & \qquad \text{state equations} \\
+    y(t) = H x(t) + D u(t) & \qquad \text{output equation}
+\end{cases}
+\]
+
+where $F$, $G$, $H$ and $D$ are matrices defined a follows:
+\begin{align*}
+    F = \begin{bmatrix}
+        \\
+        n \times n \\
+        \text{state matrix} \\ \\
+    \end{bmatrix}
+    &
+    \qquad
+    G = \begin{bmatrix}
+        \\
+        \\
+        n \times 1 \\
+        \text{input} \\
+        \text{matrix} \\ \\
+    \end{bmatrix}
+    \\ \\
+    H = \begin{bmatrix}
+        1 \times n \;\;\; \text{output matrix}
+    \end{bmatrix}
+    &
+    \qquad
+    D = \begin{bmatrix}
+        1 \times 1 \;\;\; \text{i/o matrix}
+    \end{bmatrix}
+\end{align*}
+
+In this case we have 1 input and 1 output (i.e. \emph{SISO} system), but those \emph{difference equations} can be extended for multiple inputs and outputs systems. Usually $D=0$ since we can say that the majority of real systems have this property. 
+
+\begin{defn} [Strictly-proper system]
+A system is called \emph{strictly-proper} when the output of the system doesn't directly depend on the input (i.e. $D=0$). 
+\end{defn}
+
+\begin{rem}
+$n$ is the \emph{order} of the system.
+\end{rem}
+
+\begin{exa}[SISO system of order $n=2$]
+    \[
+    \Sc: 
+        \begin{cases}
+            x_1(t+1) = \frac{1}{2} x_1(t) + 2u(t) \\
+            x_2(t+1) = x_1(t) + 2x_2(t) + u(t) \\
+            y(t) = \frac{1}{4}x_1(t) + \frac{1}{2}x_2(t)
+        \end{cases}
+    \]
+    In this case $n=2$, $x(t) = \begin{bmatrix}
+        x_1(t) \\
+        x_2(t)
+    \end{bmatrix}$, one input $u(t)$ and one output $y(t)$.
+
+    \begin{align*}
+        F = \begin{bmatrix}
+            \frac{1}{2} & 0 \\
+            1 & 2
+        \end{bmatrix}
+        & \qquad
+        G = \begin{bmatrix}
+            2 \\ 1
+        \end{bmatrix}
+        \\
+        H = \begin{bmatrix}
+            \frac{1}{4} & \frac{1}{2}
+        \end{bmatrix}
+        & \qquad
+        D = 0
+    \end{align*}
+\end{exa}
+
+
+\begin{rem}[\acrlong{ss} representation is not unique]
+    Let $F_1 = TFT^{-1}$, $G_1 = TG$, $H_1 = HT^{-1}$, $D_1 = D$ for any invertible $(n\times n)$ matrix $T$. Then, the system $\{F, G, H, D\}$ is equivalent to $\{F_1, G_1, H_1, D_1\}$.
+\end{rem}
\ No newline at end of file
diff --git a/lectures/2022_04_07.tex b/lectures/2022_04_07.tex
new file mode 100644
index 0000000..27f630a
--- /dev/null
+++ b/lectures/2022_04_07.tex
@@ -0,0 +1,405 @@
+%!TEX root = ../main.tex
+
+\subsection{Representation \#2: \acrfull{tf}}
+
+\[
+    W(z) = \frac{B(z)}{A(z)} z^{-k} = \frac{b_0 + b_1z^{-1} + b_2z^{-2} + \ldots + b_pz^{-p}}{a_0 + a_1z^{-1} + a_2z^{-2} + \ldots + a_nz^{-n}} z^{-k} 
+\]
+\vspace{1pt}
+\[
+     \Sc: y(t) = W(z)u(t)
+\]     
+
+The \gls{tf} $W(z)$ is a rational function of \emph{z}: it's a \emph{digital filter}.\\
+
+\begin{rem}[$z$ operator]
+    The linear $z$ operator is such that $z^{-1}[x(t)]=x(t-1)$ and $z[x(t)]=x(t+1)$.\\
+    From now on the "$[\cdot]$" will be \textbf{omitted} for simplicity.
+\end{rem}
+
+%Now it's trivial to move from T.F. representation to a time domain description of the system.
+
+\begin{exa}
+    \begin{align*}
+    \Sc: \quad
+        & y(t) = \underbrace{\begin{bmatrix}
+            \frac{1+\frac{1}{2}z^{-1}}{2+\frac{1}{3}z^{-1}+\frac{1}{4}z^{-2}} z^{-1}
+        \end{bmatrix}}_{W(z)} u(t) \\
+        & 2y(t) + \frac{1}{3}y(t-1) + \frac{1}{4}y(t-2) = u(t-1) + \frac{1}{2}u(t-2) \\
+        & y(t) = \underbrace{-\frac{1}{6}y(t-1) - \frac{1}{8}y(t-2)}_\text{recursive part} + \underbrace{\frac{1}{2}u(t-1) + \frac{1}{4}u(t-2)}_\text{past inputs}
+    \end{align*}
+
+\end{exa}
+\begin{rem}[IRR and FIR filters]
+\hfill \break 
+    $\displaystyle W(z) = \frac{z^{-1}}{1 + \frac{1}{3}z^{-1}}$ is an IIR (\emph{Infinite Impulse Response}) filter since it has the recursive part because of the presence of \emph{poles} in $W(z)$.\\
+    $\displaystyle W(z) = z^{-1} + \frac{1}{2}z^{-2} + \frac{1}{4}z^{-3}$ is a FIR (\emph{Finite Impulse Response}) filter since it depends only on a \emph{finite} sequence of past inputs.
+\end{rem}
+
+\begin{rem}[Strictly proper systems]
+    Notice that for strictly proper systems the delay of $W(z)$ is $k \ge 1$, or, equivalently, the order of the numerator of $W(z)$ is strictly smaller than the order of the denominator of $W(z)$.
+    \begin{figure}[H]
+        \begin{minipage}[t]{0.5\textwidth}
+            \centering
+            \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+                \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+                \draw[->] (0,-1) -- (0,2) node[left] {$u(t)$};
+                \draw[domain=0:1,smooth,variable=\x,red] plot ({\x},{0});
+                \draw[domain=1:3,smooth,variable=\x,red] plot ({\x},{1.5});
+                \draw[red] (1,0) -- (1,1.5);
+                \draw[mark=*, mark options={fill=blue},blue,samples=5,domain=0:0.8,only marks,variable=\x] plot ({\x},{0});
+                \draw[mark=*, mark options={fill=blue},blue,samples=10,domain=1:3,only marks,variable=\x] plot ({\x},{1.5});
+                \node at (1.1,-0.4) {$t_0$};
+                \draw[green,fill=green] (1,1.5) circle (0.5ex);
+            \end{tikzpicture}
+        \end{minipage}
+        \begin{minipage}[t]{0.5\textwidth}
+            \centering
+            \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+                \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+                \draw[->] (0,-1) -- (0,2) node[left] {$y(t)$};
+                \draw[domain=0:1,smooth,variable=\x,red] plot ({\x},{0});
+                \draw[domain=1:3,smooth,variable=\x,red] plot ({\x},{2*(1-e^(-(\x-1)*2))});
+                \draw[mark=*, mark options={fill=blue},blue,samples=5,domain=0:0.8,only marks,variable=\x] plot ({\x},{0});
+                \draw[mark=*, mark options={fill=blue},blue,samples=10,domain=1:3,only marks,variable=\x] plot ({\x},{2*(1-e^(-(\x-1)*2))});
+                \node at (1.1,-0.4) {$t_0$};
+                \draw[green,fill=green] (1,0) circle (0.5ex);
+                \node[align=left] at (3.5,1) {there is no \emph{jump}\\ and at $t_{0}$ is 0};
+                \draw[->] (2,0.8) -- (1.1,0.1);
+            \end{tikzpicture}
+        \end{minipage}
+    \end{figure}
+\end{rem}
+
+\subsection{Representation \#3: Convolution of the input with the \acrfull{ir}}
+The third way to represent a system is through the \emph{convolution} of the input with the \emph{\acrfull{ir}}.\\
+\begin{defn}[\acrlong{ir}]
+    In the discrete time domain, the \acrlong{ir} $\omega(t)$ of a filter $W(z)$ is $y(t) = W(z)u(t)$ where the input is the impulse (i.e. $u(t)=0$ everywhere except from $t=0$ where $u(t=0)=1$).
+    \[
+    \omega(t)=\{\omega(0), \omega(1), \omega(2), \cdots\}
+    \]
+\end{defn}
+
+\begin{figure}[H]
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+            \draw[->] (0,-1) -- (0,2) node[left] {$u(t)$};
+            \draw[domain=-0.3:0,smooth,variable=\x,red] plot ({\x},{0});
+            \draw[domain=0:0.2,smooth,variable=\x,red] plot ({\x},{1.5});
+            \draw[domain=0.2:2.5,smooth,variable=\x,red] plot ({\x},{0});
+            \draw[mark=*, mark options={fill=blue},blue,samples=1,domain=0:0.00001,only marks,variable=\x] plot ({\x},{1.5});
+            \draw[mark=*, mark options={fill=blue},blue,samples=5,domain=0.4:2.5,only marks,variable=\x] plot ({\x},{0});
+        \end{tikzpicture}
+        \caption*{Impulse in input}
+    \end{minipage}
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+            \draw[->] (0,-1) -- (0,2) node[left] {$y(t)$};
+            \draw[domain=0:2.5,smooth,variable=\x,red] plot ({\x},{2*sin(\x*180/3.14*2)*e^(-\x)});
+            \draw[mark=*, mark options={fill=blue},blue,samples=6,domain=0:2.5,only marks,variable=\x] plot ({\x},{2*sin(\x*180/3.14*2)*e^(-\x)});
+            \node at (0.5,-0.3) {$\omega(0)$};
+            \node at (0.5,1.4) {$\omega(1)$};
+            \node at (1.5,1.0) {$\omega(2)$};
+        \end{tikzpicture}
+        \caption*{\gls{ir} in output}
+    \end{minipage}
+\end{figure}
+
+\textbf{Note} If the system is strictly proper then $\omega(0) = 0$.
+
+It can be proven that the input-output relationship from a general input $u(t)$ to the output $y(t)$ of a system characterized by an \gls{ir} $\omega(t)$ can be written as
+\[ y(t) = \omega(0) u(t) + \omega(1) u(t-1) + \omega(2) u(t-2) + \cdots
+        = \sum_{k=0}^{\infty} \omega(k) u(t-k) \]
+which is the \emph{convolution} of the \gls{ir} with the input signal.
+
+%naming transformations
+\newcommand\nameeq[2]{\text{\qquad #2:}&&&\phantom{\text{#2:}}}
+
+\section{Transformations between representations}
+It is possible translate each representation into another one. Therefore, there are six possible transformations between the three representations.
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node (n1) [draw, circle, align=center]{\#1\\\acrshort{ss}};
+        \node (n2) [draw, circle, align=center, below of=n1, xshift=-1.8cm] {\#2\\\gls{tf}};
+        \node (n3) [draw, circle, align=center, below of=n1, xshift= 1.8cm] {\#3\\\gls{ir}};
+
+        \draw[->] (n1) edge[bend right] (n2);
+        \draw[->] (n2) edge[bend right] (n3);
+        \draw[->] (n1) edge[bend left] (n3);
+        \draw[->] (n2) edge[bend right=20] (n1);
+        \draw[->] (n3) edge[bend right=20] (n2);
+        \draw[->] (n3) edge[bend left=20] (n1);
+    \end{tikzpicture}
+    \caption*{Transformations between representations}
+\end{figure}
+
+\subsection{\acrlong{ss} to \acrlong{tf}}
+Consider a strictly proper system with the following \gls{ss} representation:
+\[
+\Sc: 
+\begin{cases}
+    x(t+1) = F x(t) + G u(t)\\
+    y(t) = H x(t) + \cancelto{0}{D u(t)}\\
+\end{cases}
+\Rightarrow
+\begin{cases}
+    x(t+1) = F x(t) + G u(t)\\
+    y(t) = H x(t)\\
+\end{cases}
+\]
+From the system we get
+\[ z x(t) = F x(t) + G u(t) \Rightarrow x(t) = (zI - F)^{-1} G u(t) \]
+\[ \Rightarrow y(t) = H x(t) = H (zI - F)^{-1} G \cdot u(t) \]
+Thus, the \acrlong{tf} is
+\begin{flalign}
+    \nameeq{W(z) = H(zI - F) ^ {-1} G}{\gls{ss}\textrightarrow\gls{tf}}\label{t1}
+\end{flalign}
+
+\begin{exa}{Consider the following SISO system of order $n=2$:}
+\begin{align*}
+    F = \begin{bmatrix}
+        1 & 0\\
+        \frac{1}{2} & 2\\
+    \end{bmatrix}
+    &&
+    G = \begin{bmatrix}
+        1\\
+        1\\
+    \end{bmatrix}
+    &&
+    H = \begin{bmatrix}
+        1 & 0\\
+    \end{bmatrix}
+    &&
+    D = 0
+\end{align*}
+
+Using the transformation \ref{t1}:
+\vspace{-10pt}
+
+\begin{align*}
+W(z) &=
+\begin{bmatrix}
+    1 & 0\\
+\end{bmatrix}
+\left( \begin{bmatrix}
+    z & 0\\
+    0 & z\\
+\end{bmatrix}
+-
+\begin{bmatrix}
+    1 & 0 \\
+    \frac{1}{2} & 2\\
+\end{bmatrix}\right)^{-1}
+\begin{bmatrix}
+    1\\
+    1\\
+\end{bmatrix}
+= \begin{bmatrix}
+    1 & 0\\
+\end{bmatrix}
+\begin{bmatrix}
+    z-1 & 0\\
+    -\frac{1}{2} & z-2\\
+\end{bmatrix}^{-1}
+\begin{bmatrix}
+    1\\
+    1\\
+\end{bmatrix}\\
+&= \begin{bmatrix}
+    1 & 0\\
+\end{bmatrix}
+\frac{1}{(z-1)(z-2)}
+\begin{bmatrix}
+    z-2 & 0\\
+    \frac{1}{2} & z-1\\
+\end{bmatrix}
+\begin{bmatrix}
+    1\\
+    1\\
+\end{bmatrix}
+=
+\frac{1}{(z-1)(z-2)}
+\begin{bmatrix}
+    z-2 & 0\\
+\end{bmatrix}
+\begin{bmatrix}
+    1\\
+    1\\
+\end{bmatrix}\\
+&=
+\frac{\cancel{z-2}}{(z-1)\cancel{(z-2)}} = \frac{1}{z-1} = \frac{1}{1-z^{-1}} z^{-1}
+\end{align*}
+Notice that, due to \textbf{cancellation of singularities}, in this case we only have one pole, but the system is of order two; this comes from the fact that part of the system is \emph{non observable}.\\
+Alternatively, it could be noted that $\{F, G, H, D\}$ corresponds to the following system:
+    \[
+    \Sc: 
+        \begin{cases}
+            x_1(t+1) = x_1(t) + u(t) \\
+            x_2(t+1) = \frac{1}{2} x_1(t) + 2x_2(t) + u(t) \\
+            y(t) = x_1(t)
+        \end{cases}
+    \Rightarrow
+        \begin{cases}
+            zx_1(t) = x_1(t) + u(t) \\
+            zx_2(t) = \frac{1}{2} x_1(t) + 2x_2(t) + u(t) \\
+            y(t) = x_1(t)
+        \end{cases}    
+    \]
+From the first equation we have that $x_{1}(t)=\frac{1}{z-1}u(t)$ and substituting it to the third one we obtain the same result: $y(t)=x_{1}(t)=\frac{1}{z-1}u(t) \Rightarrow W(z)=\frac{1}{1-z^{-1}} z^{-1}$     
+
+\end{exa}
+
+\subsection{\acrlong{tf} to \acrlong{ss}}
+This conversion is not very used in practice and it is called the \emph{realization} of a \acrlong{tf} into a \acrlong{ss} system.
+
+\textbf{Issue}: the \acrlong{ss} representation is not unique! Thus, from a single \acrlong{tf} we can get infinite different equivalent \acrlong{ss} models.
+
+\subsubsection{Control realization}
+
+We assume that the system is \textbf{strictly proper} and that the denominator is \textbf{monic} (i.e. $a_0=1$).
+\[ W(z) = \frac{b_0 z^{n-1} + b_1 z^{n-2} + \dots + b_{n-1}}{z^n + a_1 z^{n-1} + a_2 z^{n-2} + \dots + a_n} \]
+
+The formula for the control realization of $W(z)$ is
+\begin{flalign}
+\nameeq{
+    F = \begin{bmatrix}
+        0 & 1 & 0 & \cdots & 0\\
+        0& 0 & 1 & \ddots & \vdots \\
+        \vdots & \vdots & \ddots & \ddots & 0\\
+        0 & 0 & \cdots & 0 & 1\\
+        -a_n & -a_{n-1} & \multicolumn{2}{c}{\cdots} & -a_1\\
+    \end{bmatrix}
+    &&
+    G = \begin{bmatrix}
+        0\\
+        0\\
+        0\\
+        \vdots\\
+        1\\
+    \end{bmatrix}
+    &&
+    H = \begin{bmatrix}
+        b_{n-1} & b_{n-2} & \cdots & b_0\\
+    \end{bmatrix}
+    &&
+    D = 0
+    }{\gls{tf}\textrightarrow\gls{ss}}\label{t2}
+\end{flalign}
+\begin{exa}
+    Consider the following \acrlong{tf}:
+    \[ W(z) = \frac{2z^2 + \frac{1}{2}z + \frac{1}{4}}{z^3 + \frac{1}{4}z^2 + \frac{1}{3}z + \frac{1}{5}} \]
+    Applying the transformation \ref{t2}, the control realization is:
+    \begin{align*}
+        F = \begin{bmatrix}
+            0 & 1 & 0\\
+            0 & 0 & 1\\
+            -\frac{1}{5} & -\frac{1}{3} & -\frac{1 }{4}\\
+        \end{bmatrix}
+        &&
+        G = \begin{bmatrix}
+            0\\
+            0\\
+            1\\
+        \end{bmatrix}
+        &&
+        H = \begin{bmatrix}
+            \frac{1}{4} & \frac{1}{2} & 2\\
+        \end{bmatrix}
+        &&
+        D = 0
+    \end{align*}
+\end{exa}
+
+\subsection{\acrlong{tf} to \acrlong{ir}}
+To get the \gls{ir} from a \acrlong{tf} $W(z)$ is sufficient to make the $\infty$-long division between the numerator and denominator of $W(z)$.
+\begin{flalign}
+\nameeq{\omega(t)= 
+    \begin{cases}
+        e_{t} \qquad \text{if $t \ge 1$}\\
+        0     \qquad \text{ if $t=0$ (and $W(z)$ is strictly-proper)}
+    \end{cases}
+    }{\gls{tf}\textrightarrow\gls{ir}}\label{t3}
+\end{flalign}
+\qquad where $e_{t}$ are the coefficients of 
+\[E(z) = e _{1}z^{-1} + e_{2}z^{-2} + \cdots = \sum_{t=1}^{\infty} e_{t}z^{-t}\]
+\qquad which is the \emph{remainder} of the $\infty$-long division.
+
+This can be simply proven remembering the \emph{convolution} of the \gls{ir} of $W(z)$ with the input $u(t)$.
+
+\begin{exa}
+    Consider the following \acrlong{tf}:
+    \[ W(z) = \frac{1}{z-\frac{1}{2}} = \frac{z^{-1}}{1-\frac{1}{2}z^{-1}}
+        \stackrel{\text{$\infty$-long div.}}{=} 0 z^{-0} + 1 z^{-1} + \frac{1}{2}z^{-2} + \frac{1}{4}z^{-3} + \cdots \]
+    Applying the transformation \ref{t3} the \gls{ir} is $\omega(0) = 0$, $\omega(1) = 1$, $\omega(2) = \frac{1}{2}$, $\omega(3) = \frac{1}{4}$, $\dots$
+    Thus $\omega(0)=0$ and $\omega(t) = \frac{1}{2^{t-1}} \quad \forall t \ge 1$
+    %\\and $y(t)$ is the convolution of $u(t)$ with the \gls{ir} $w(t)$, indeed: 
+    %\[
+    %    y(t)=W(z)u(t)=\omega(0)u(0)+\omega(1)u(1)+\cdots = \sum_{k=0}^{\infty}\omega(t)u(t-k)
+    %\]
+
+    In this case there is also a quicker way
+    \[ y(t) = \frac{z^{-1}}{1-\frac{1}{2}z^{-1}} u(t) = \left( z^{-1} \frac{1}{1-\frac{1}{2}z^{-1}} \right) u(t) \]
+    Remembering that for \emph{geometric series} we have \[ \sum_{k = 0}^{\infty} a^k = \frac{1}{1-a} \text{ if } |a| < 1 \]
+    we can rewrite $y(t)$ as follows
+    \[ y(t) = \left( z^{-1} \sum_{k=0}^{\infty} \left( \frac{1}{2} z^{-1} \right)^{k} \right) u(t) = \left( 0 + 1 z^{-1} + \frac{1}{2}z^{-2} + \frac{1}{4}z^{-3} + \cdots \right) u(t) \]
+\end{exa}
+
+\subsection{\acrlong{ir} to \acrlong{tf}}
+\begin{defn}[$\mathcal{Z}$-transform]
+    Given a discrete-time signal $s(t)$ such that $\forall t < 0: s(t) = 0$, its \emph{$\mathcal{Z}$-transform} is defined as
+    \[ \mathcal{Z} \left( s(t) \right) = \sum_{t = 0}^{\infty} s(t) z^{-t} \]
+\end{defn}
+Given this, it can be proven that
+\begin{flalign}
+\nameeq{W(z) = \mathcal{Z}\left( \omega(t) \right) = \sum_{t = 0}^{\infty} \omega(t) z^{-t}}{\gls{tf}\textrightarrow\gls{ir}}\label{t4}
+\end{flalign}
+This means that the \acrlong{tf} of a system is the $\mathcal{Z}$-transform of a special signal, $\omega(t)$, that is the \acrlong{ir} of the system.
+
+\begin{rem}
+    This formula cannot be used in practice to transform an \gls{ir} representation to a \gls{tf} representation.
+    This is because we need infinite points of the \acrlong{ir}, and it must be available \emph{noise-free}.
+    Thus, this transformation is only theoretical.
+\end{rem}
+
+\subsection{\acrlong{ss} to \acrlong{ir}}
+Consider the following \acrlong{ss} model, with initial conditions $x(0) = 0$ and $y(0) = 0$
+\[
+    \Sc: 
+    \begin{cases}
+        x(t+1) = F x(t) + G u(t)\\
+        y(t) = H x(t)\\
+    \end{cases}
+\]
+"Running the simulation of the system", we have that
+\begin{align*}
+    x(1) &= \cancelto{0}{F x(0)} + G u(0) = G u(0)\\
+    y(1) &= H x(1) = H G u(0)\\
+         &\Downarrow\\
+    x(2) &= F x(1) + G u(1) = F G u(0) + G u(1)\\
+    y(2) &= H x(2) = H F G u(0) + H G u(1)\\
+         &\Downarrow\\
+    x(3) &= F x(2) + G u(2) = F^2 G u(0) + F G u(1) + G u(2)\\
+    y(3) &= H x(3) = H F^2 G u(0) + H F G u(1) + H G u(2)\\
+         &\vdots
+\end{align*}
+This can be generalized to
+\[ y(t) = 0 u(t) + H G u(t-1) + H F G u(t-2) + H F^2 G u(t-3) + \cdots \]
+Recalling that 
+\[ y(t) = \omega(0) u(t) + \omega(1) u(t-1) + \omega(2) u(t-2) + \omega(3) u(t-3) + \cdots \]
+we deduce that $\omega(0)=0,\, \omega(1)=H G,\, \omega(2)=H F G,\, \omega(3)= H F^2 G,\, \dots $\\
+Thus, the \acrlong{ir} is
+\begin{flalign}
+\nameeq{
+    \omega(t) =
+    \begin{cases}
+        0 \text{           if } t = 0\\
+        H F^{t-1} G \text{ if } t > 0
+    \end{cases}
+}{\gls{ss}\textrightarrow\gls{ir}}\label{t5}
+\end{flalign}
diff --git a/lectures/2022_04_11.tex b/lectures/2022_04_11.tex
new file mode 100644
index 0000000..727f61b
--- /dev/null
+++ b/lectures/2022_04_11.tex
@@ -0,0 +1,659 @@
+%!TEX root = ../main.tex
+\subsection{Summary of transformations}
+Notice that the \gls{ir} representation is very easy to obtain experimentally, since we only need to measure the system response to the impulse signal.
+However, given the \gls{ir} representation, it is difficult to get to the other representations, since the transformation from \gls{ir} to \gls{tf} is only theoretical.
+Moving from the \gls{ir} to the \gls{ss} representation is the key task of the \emph{Subspace-based \acrlong{ss} System Identification}, also known as \emph{4SID method}.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node (n1) [draw, circle, align=center]{\#1\\\acrshort{ss}};
+        \node (n2) [draw, circle, align=center, below of=n1, xshift=-1.8cm] {\#2\\\gls{tf}};
+        \node (n3) [draw, circle, align=center, below of=n1, xshift= 1.8cm] {\#3\\\gls{ir}};
+        
+        %legend
+        \draw[-stealth, line width=0.4mm] (3,0) -- (5, 0)
+            node[midway, above] {useful and feasible};
+        \draw[-stealth] (3,-0.8) -- (5, -0.8)
+            node[midway, above] {feasible};
+        \draw[-stealth, dashed] (3,-1.6) -- (5, -1.6)
+        node[midway, above] {not feasible};
+
+        \draw[->, line width=0.4mm] (n1) edge[bend right] (n2);
+        \draw[->, line width=0.4mm] (n2) edge[bend right] (n3);
+        \draw[->, line width=0.4mm] (n1) edge[bend left] (n3);
+        \draw[->] (n2) edge[bend right=20] (n1);
+        \draw[->, dashed] (n3) edge[bend right=20] (n2);
+        \draw[->, line width=0.4mm, red] (n3) edge[bend left=20] node {?} (n1);
+    \end{tikzpicture}
+    \caption*{Transformations between representations in practice}
+\end{figure}
+
+
+Before moving to the topic of \emph{4SID method}, a recall of the \emph{observability} and \emph{controllability} properties for a linear dynamic system is needed.
+
+\section{Fundamental concepts of Observability and Controllability}
+
+\[
+    \Sc: 
+    \begin{cases}
+        x(t+1) = Fx(t) + Gu(t) \\
+        y(t) = Hx(t)
+    \end{cases}
+\]
+
+\begin{defn}[Fully Observable]
+The system is fully observable (from the output) if and only if the \textbf{observability matrix} $O$ is full rank:
+\[
+    O = \begin{bmatrix}
+        H \\
+        HF \\
+        \vdots \\
+        HF^{n-1}
+    \end{bmatrix}
+    \qquad
+    \rank (O) = n
+\]
+where $n$ is the order of the system.
+\end{defn}
+
+\begin{rem}[Observability]
+Observability is a property of the system: by observing the output $y(t)$ it's possible to observe the state $x(t)$.\\
+\textbf{Note}: It refers only to state and output (i.e. $F$ and $H$).
+\end{rem}
+
+\begin{defn}[Fully Controllable]
+The system is fully controllable (from the input) if and only if the \textbf{controllability matrix} $R$ is full rank:
+\[
+    R = \begin{bmatrix}
+        G & FG & \cdots & F^{n-1}G
+    \end{bmatrix}
+    \qquad
+    \rank (R) = n
+\]
+where, again, $n$ is the order of the system.\\
+$R$ is also called \emph{reachability} matrix.
+\end{defn}
+
+\begin{rem}[Controllability]
+Controllability is a property of the system: by driving (or move) the input $u(t)$ it's possible to control the state $x(t)$.\\
+\textbf{Note}: It refers only to input and state (i.e. $F$ and $G$).\\
+\textbf{Note}: Controllability and Reachability are synonyms.
+\end{rem}
+
+\begin{exa}[SISO system of order $n=2$]
+    \begin{align*}
+    \Sc: 
+        \begin{cases}
+            x_1(t+1) = \frac{1}{2} x_1(t) + u(t) \\
+            x_2(t+1) = \frac{1}{3}x_2(t) \\
+            y(t) = \frac{1}{4}x_1(t)
+        \end{cases}
+        \qquad
+        F = \begin{bmatrix}
+            \frac{1}{2} & 0 \\
+            0 & \frac{1}{3}
+        \end{bmatrix}
+        \qquad
+        H = \begin{bmatrix}
+            \frac{1}{4} & 0
+        \end{bmatrix}
+        \qquad
+        G = \begin{bmatrix}
+            1 \\
+            0
+        \end{bmatrix}
+    \end{align*}
+
+    \[
+        O = \begin{bmatrix}
+            H \\
+            HF
+        \end{bmatrix} = \begin{bmatrix}
+            \frac{1}{4} & 0 \\
+            \frac{1}{8} & 0
+        \end{bmatrix}
+        \qquad
+        \rank (O) = 1 < n = 2
+        \quad\implies\quad \text{not fully observable}
+    \]
+
+    \[
+        R = \begin{bmatrix}
+            G & FG
+        \end{bmatrix} = \begin{bmatrix}
+            1 & \frac{1}{2} \\
+            0 & 0
+        \end{bmatrix}
+        \qquad
+        \rank (R) = 1 < n = 2
+        \quad\implies\quad \text{not fully controllable}
+    \]
+    If $\Sc$ becomes 
+    \begin{align*}
+    \mathcal{S'}: 
+        \begin{cases}
+            x_1(t+1) = \frac{1}{2} x_1(t) + u(t) + \color{blue}\frac{1}{6} x_2(t)\\
+            x_2(t+1) = \frac{1}{3}x_2(t) \\
+            y(t) = \frac{1}{4}x_1(t)
+        \end{cases}
+        \qquad
+        \text{only $F$ changes and becomes}\quad
+        F = \begin{bmatrix}
+            \frac{1}{2} & \color{blue}\frac{1}{6} \\
+            0 & \frac{1}{3}
+        \end{bmatrix}
+    \end{align*} 
+    And     
+    \[
+        O = \begin{bmatrix}
+            H \\
+            HF
+        \end{bmatrix} = \begin{bmatrix}
+            \frac{1}{4} & 0 \\
+            \frac{1}{8} & \color{blue}\frac{1}{24}
+        \end{bmatrix}
+        \qquad
+        \rank (O) = 2 = n         \quad\implies\quad \text{fully observable}
+    \]
+    
+    Observability and Controllability can also be checked graphically by building the block scheme 
+    
+      \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \node[block, align=center] (z1) {$z^{-1}$};
+            \node[block, align=center, below of=z1] (12) {$\frac{1}{2}$};
+            \node[block, align=center, right=2cm of z1] (14) {$\frac{1}{4}$};
+            \node[sum, align=center, left=2cm of z1] (sum1) {};
+            
+            \node[coordinate] at (0,-2.5cm) (block2) {};
+            \node[block, align=center, below of=block2] (z1_2) {$z^{-1}$};
+            \node[block, align=center, below of=z1_2] (13) {$\frac{1}{3}$};
+            \node[sum, align=center, left=2cm of z1_2] (sum2) {};
+            
+            \node[block, align=center, below right = 2cm and 0.5cm of sum1, blue] (16) {$\frac{1}{6}$};
+            
+            \draw[->] (sum1) -- (z1)
+                node[midway, above] {$x_1(t+1)$};;
+            \draw[->] (z1) -- (14)
+                node[midway,above] (x1t) {$x_1(t)$};
+            \draw[->] (x1t.south) |- (12.east);
+            \draw[->] (12) -| (sum1)
+                node[very near end, right] {$+$};
+            \draw[<-] (sum1) --++ (-2,0)
+                node[near start,above] {$+$}
+                node[very near end, above] (ut) {$u(t)$};
+            \draw[->] (14) --++ (2,0)
+                node[near end,above] (yt) {$y(t)$};
+                
+            \draw[->] (sum2) -- (z1_2)
+                node[midway, above] {$x_2(t+1)$};
+            \draw[->] (13) -| (sum2)
+                node[very near end, right] {$+$};
+            \draw[->] (z1_2) --++ (2,0)
+                node[midway,above] (x2t) {$x_2(t)$};
+            \draw[->] (x2t.south) |- (13.east);    
+
+            \draw[->, blue] (x2t.west)+(0,-0.25cm) |- (16.east);
+            \draw[->, blue] (16.west) -| (sum1.west)
+                node[very near end, left] {$+$};
+                
+            \draw[black, dashed] ([yshift=3mm]14.north)-|(ut.east)|-([yshift=-3mm]13.south)-|(yt.west)|-([yshift=3mm]14.north);
+        \end{tikzpicture}
+    \end{figure}
+    From this block scheme it can be seen that $x_1$ is \emph{directly} observable and reachable; $x_2$ becomes only \emph{indirectly} observable through $x_1(t)$ with the introduction of the blue block that correspond to the term $\color{blue}\frac{1}{6} x_2(t)$ in $\mathcal{S'}$.
+\end{exa}
+
+\begin{exa}[SISO system of order $n=2$]
+    \begin{align*}
+    \Sc: 
+        \begin{cases}
+            x_1(t+1) = \frac{1}{2} x_1(t)\\
+            x_2(t+1) = \frac{1}{3}x_2(t) + u(t)\\
+            y(t) = \frac{1}{4}x_1(t)
+        \end{cases}
+        \qquad
+        F = \begin{bmatrix}
+            \frac{1}{2} & 0\\
+            0 & \frac{1}{3}
+        \end{bmatrix}
+        \qquad
+        H = \begin{bmatrix}
+            \frac{1}{4} & 0
+        \end{bmatrix}
+        \qquad
+        G = \begin{bmatrix}
+            0 \\
+            1
+        \end{bmatrix}
+    \end{align*}
+
+    \[
+        O = \begin{bmatrix}
+            H \\
+            HF
+        \end{bmatrix} = \begin{bmatrix}
+            \frac{1}{4} & 0 \\
+            \frac{1}{8} & 0
+        \end{bmatrix}
+        \qquad
+        \rank (O) = 1 < n = 2
+        \quad\implies\quad \text{not fully observable}
+    \]
+
+    \[
+        R = \begin{bmatrix}
+            G & FG
+        \end{bmatrix} = \begin{bmatrix}
+            0 & 0 \\
+            1 & \frac{1}{3}
+        \end{bmatrix}
+        \qquad
+        \rank (R) = 1 < n = 2
+        \quad\implies\quad \text{not fully controllable}
+    \]
+    
+    If $\Sc$ becomes 
+    \begin{align*}
+    \mathcal{S'}: 
+        \begin{cases}
+            x_1(t+1) = \frac{1}{2} x_1(t) + \color{blue} \frac{1}{6} x_2(t)\\
+            x_2(t+1) = \frac{1}{3}x_2(t) + u(t)\\
+            y(t) = \frac{1}{4}x_1(t)
+        \end{cases}
+        \qquad
+        \text{only $F$ change and becomes}\quad
+        F = \begin{bmatrix}
+            \frac{1}{2} & \color{blue} \frac{1}{6} \\
+            0 & \frac{1}{3}
+        \end{bmatrix}
+    \end{align*} 
+    And  
+    
+    \[
+        O = \begin{bmatrix}
+            H \\
+            HF
+        \end{bmatrix} = \begin{bmatrix}
+            \frac{1}{4} & 0 \\
+            \frac{1}{8} & \color{blue} \frac{1}{24}
+        \end{bmatrix}
+        \qquad
+        \rank (O) = 2 = n = 2
+        \quad\implies\quad \text{fully observable}
+    \]
+    
+    \[
+        R = \begin{bmatrix}
+            G & FG
+        \end{bmatrix} = \begin{bmatrix}
+            0 & \color{blue} \frac{1}{6} \\
+            1 & \frac{1}{3}
+        \end{bmatrix}
+        \qquad
+        \rank (R) = 2 = n = 2
+        \quad\implies\quad \text{fully controllable}
+    \]
+    
+    Again, this properties can be also check using the block scheme
+    
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \node[block, align=center] (z1) {$z^{-1}$};
+            \node[block, align=center, below of=z1] (12) {$\frac{1}{2}$};
+            \node[block, align=center, right=2cm of z1] (14) {$\frac{1}{4}$};
+            \node[sum, align=center, left=2cm of z1] (sum1) {};
+            
+            \node[coordinate] at (0,-2.5cm) (block2) {};
+            \node[block, align=center, below of=block2] (z1_2) {$z^{-1}$};
+            \node[block, align=center, below of=z1_2] (13) {$\frac{1}{3}$};
+            \node[sum, align=center, left=2cm of z1_2] (sum2) {};
+            
+            \node[block, align=center, below right = 2cm and 0.5cm of sum1, blue] (16) {$\frac{1}{6}$};
+            
+            \draw[->] (sum1) -- (z1)
+                node[midway, above] {$x_1(t+1)$};;
+            \draw[->] (z1) -- (14)
+                node[midway,above] (x1t) {$x_1(t)$};
+            \draw[->] (x1t.south) |- (12.east);
+            \draw[->] (12) -| (sum1)
+                node[very near end, right] {$+$};
+            \draw[<-] (sum2) --++ (-2,0)
+                node[near start,above] {$+$}
+                node[very near end, above] (ut) {$u(t)$};
+            \draw[->] (14) --++ (2,0)
+                node[near end,above] (yt) {$y(t)$};
+                
+            \draw[->] (sum2) -- (z1_2)
+                node[midway, above] {$x_2(t+1)$};
+            \draw[->] (13) -| (sum2)
+                node[very near end, right] {$+$};
+            \draw[->] (z1_2) --++ (2,0)
+                node[midway,above] (x2t) {$x_2(t)$};
+            \draw[->] (x2t.south) |- (13.east);    
+
+            \draw[->, blue] (x2t.west)+(0,-0.25cm) |- (16.east);
+            \draw[->, blue] (16.west) -| (sum1.west)
+                node[very near end, left] {$+$};
+                
+            \draw[black, dashed] ([yshift=3mm]14.north)-|(ut.east)|-([yshift=-3mm]13.south)-|(yt.west)|-([yshift=3mm]14.north);
+        \end{tikzpicture}
+    \end{figure}
+    
+    It can be noticed that $x_1(t)$ is \emph{directly} observable but not reachable and $x_2(t)$ is \emph{directly} controllable. In $\mathcal{S}'$, with the introduction of the same blue block, the system becomes fully observable and controllable; in particular, $x_2(t)$ becomes also \emph{indirectly} observable and $x_1(t)$ becomes also \emph{indirectly} controllable.
+    
+\end{exa}
+
+\begin{rem}[4 sub-systems]
+\hfill \break
+    Any \acrlong{ss} system can be \textbf{internally} seen as 4 sub-systems as follows:
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \node[block, align=center] (noc) {NO\\C};
+            \node[block, align=center, below of=noc] (onc) {O\\NC};
+            \node[block, align=center, below of=onc] (oc) {O\\C};
+            \node[block, align=center, below of=oc] (nonc) {NO\\NC};
+            
+            %legend
+            \node[coordinate] at (5cm, 0cm) (lgnd) {};
+            \node[align=center, right of=lgnd]{O: observable\\C: controllable\\NX: not X};
+
+            \node at (-5cm,-1.5cm) (u) {$u$};
+            \node[coordinate] at (-3cm,-1.5cm) (input) {};
+            \node at (5cm,-2.5cm) (y) {$y$};
+            \node[coordinate] at (3cm,-2.5cm) (output) {};
+            \node[right of=noc] (noc_out) {};
+            \node[right of=nonc] (nonc_out) {};
+            \node[left of=onc] (onc_in) {};
+
+            \draw[-Rays] (noc) -- (noc_out);
+            \draw[-Rays] (nonc) -- (nonc_out);
+            \draw[Rays-] (onc_in) -- (onc);
+            \draw (input) edge[->, bend left=10] (noc);
+            \draw[line width=0.5mm] (input) edge[->, bend right=10] (oc);
+            \draw[line width=0.5mm] (u) edge (input);
+            \draw (onc) edge[bend left=10] (output);
+            \draw[line width=0.5mm] (oc) edge[bend right=10] (output);
+            \draw[line width=0.5mm] (output) edge[->] (y);
+
+            \draw[draw=black] (-4cm,-5.5cm) rectangle ++(8cm,6.5cm);
+        \end{tikzpicture}
+    \end{figure}
+
+    Which \textbf{externally} is equivalent to a systems like this:
+    \vspace{-5pt}
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \node[block, align=center, blue] (oc) {O\\C};
+            \node[left of=oc] (in) {$u$};
+            \node[right of=oc] (out) {$y$};
+            
+            % \node[align=center, below=0.1cm of oc, blue] {$\uparrow$\\$W(z)$};
+
+            \node[below=0.7cm of oc.north, blue] {$\underbrace{\phantom{OoC}}_{W(z)}$};
+
+            \draw[->] (in) -- (oc);
+            \draw[->] (oc) -- (out);
+        \end{tikzpicture}
+    \end{figure}
+    
+    \vspace{-15pt}
+    
+    Hence, this is a graphic proof of the fact that the I/O representation of a system through the \acrlong{tf} can only represent the \emph{observable} and \emph{controllable} part of the system; all the other sub-systems remains \textbf{hidden} with this representation. For this reason, the \gls{ss} representation is "more complete" than the \gls{tf} one when the system is not fully observable or not fully controllable.
+\end{rem}
+
+Another final definition is needed before presenting the 4SID algorithm.
+
+\begin{defn}[Hankel matrix of order n]
+    Starting from the \gls{ir} $\omega(t) = \{\omega(1), \omega(2), \ldots, \omega(N)\}$ we can build the Hankel matrix of order $n$ as follows:
+
+\[
+    H_n = \begin{bmatrix}
+        \omega(1) & \omega(2) & \omega(3) & \cdots & \omega(n) \\
+        \omega(2) & \omega(3) & \omega(4) & \cdots & \omega(n+1) \\
+        \omega(3) & \omega(4) & \omega(5) & \cdots & \omega(n+2) \\
+        \vdots    & \vdots    & \vdots    & \ddots & \vdots \\
+        \omega(n) & \omega(n+1) & \omega(n+2) & \ldots & \omega(2n-1)
+    \end{bmatrix}
+\]
+
+    \textbf{Note}: it is a square matrix of size $n\times n$.
+
+    \textbf{Note}: we need the IR up to time $2n-1$.
+
+    \textbf{Note}: it starts from $\omega(1)$ and not from $\omega(0)$ (since, for strictly-proper systems, $\omega(0)=0$).
+
+    \textbf{Note}: the anti-diagonals all have the same element repeated.
+    
+    We know that $\omega(t) = \begin{cases}
+    0 &\quad \text{if } t = 0 \\
+    HF^{t-1}G &\quad \text{if } t > 0
+    \end{cases}$ ,\qquad therefore $H_n$ can be rewritten as
+
+    \[
+        H_n = \begin{bmatrix}
+            HG     & HFG    & HF^2G  & \cdots & HF^{n-1}G \\
+            \vdots & \ddots &        &        & \vdots \\
+            \vdots &        & \ddots &        & \vdots \\
+            \vdots &        &        & \ddots & \vdots \\
+            HF^{n-1}G & \cdots & \cdots & \cdots & HF^{2n-2}G
+        \end{bmatrix} = \begin{bmatrix}
+            H \\
+            HF \\
+            \vdots \\
+            HF^{n-1}
+        \end{bmatrix} \cdot \begin{bmatrix}
+            G & FG & \cdots & F^{n-1}G
+        \end{bmatrix} = O \cdot R
+    \]
+    
+    % \[
+    %     \Rightarrow H_n = O \cdot R
+    % \]
+    
+    Therefore, $H_n = O \cdot R$, where $O$ is the observability matrix and $R$ is the reachability matrix.
+\end{defn}
+
+
+
+
+\section{Subspace-based \acrlong{ss} System Identification (4SID)}
+The original 4SID method starts from the measurement of the system output in a very simple experiment, that is the \emph{impulse experiment}.
+\vspace{-7pt}
+\begin{figure}[H]
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+            \draw[->] (0,-0.5) -- (0,2) node[left] {$u(t)$};
+            \draw[domain=-0.3:0,smooth,variable=\x,red] plot ({\x},{0});
+            \draw[domain=0:0.2,smooth,variable=\x,red] plot ({\x},{1.5});
+            \draw[domain=0.2:2.5,smooth,variable=\x,red] plot ({\x},{0});
+            \draw[mark=*, mark options={fill=blue},blue,samples=1,domain=0:0.00001,only marks,variable=\x] plot ({\x},{1.5});
+            \draw[mark=*, mark options={fill=blue},blue,samples=10,domain=0.25:2.5,only marks,variable=\x] plot ({\x},{0});
+        \end{tikzpicture}
+        \caption*{Impulse in input}
+    \end{minipage}
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+            \draw[->] (0,-0.5) -- (0,2) node[left] {$y(t)$};
+            \draw[domain=0:2.5,smooth,variable=\x,red] plot ({\x},{1.5*sin(\x*180/3.14*5)*e^(-\x)});
+            \draw[mark=*, mark options={fill=blue},blue,samples=15,domain=0:2.5,only marks,variable=\x] plot ({\x},{1.5*sin(\x*180/3.14*5)*e^(-\x)});
+        \end{tikzpicture}
+        \caption*{\gls{ir} in output}
+    \end{minipage}
+    \vspace{-15pt}
+    %\caption*{Impulse experiment}
+\end{figure}
+
+The fundamental idea of this experiment is that it is very simple to measure the output of system given an impulse signal as the input.
+Since the goal of the method is to identify an \gls{ss} model $\left\{ \hat{F}, \hat{G}, \hat{H} \right\}$ starting from the \gls{ir} $\omega(t) = \left\{ \omega(0), \omega(1), \omega(2), \cdots \right\}$, it can be said that it is a \acrlong{bb} system identification method (we need only data and knowledge of the system isn't required).
+
+We will see the solution of the problem with two different assumptions:
+\begin{description}
+    \item [Basic problem] \gls{ir} measurement is assumed to be \textbf{noise-free}. Easier and not realistic problem. Described in \ref{4sid-noisefree}.
+    \item [Real problem] \gls{ir} is measured with noise, that is:5
+    \[ \widetilde{\omega}(t) = \omega(t) + \eta(t) \quad t = 0, 1,\dots, N \] where
+        $\widetilde{\omega}(t)$ is the measured noisy \gls{ir},
+        $\omega(t)$ is the ``true'' noise-free \gls{ir} and
+        $\eta(t)$ is the measurement noise (e.g. \gls{wn}). Described in \ref{4sid-noise}.
+\end{description}
+
+\begin{rem}
+    We will see in detail only the original version of 4SID, that is that the experiment is an impulse-experiment.
+    However 4SID can be extended to any generic input signal $\left\{ u(1), u(2), \cdots, u(N) \right\}$ that is sufficiently exciting.
+\end{rem}
+
+\begin{rem}[Unstable System]
+    In case of an unstable system the measurements must be collected in a closed-loop experiment.
+    Indeed, if the experiment was open-loop, the experiment would be unfeasible.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block,align=center] (stab) {stability\\controller};
+            \node [sum] (sum) [right of=stab, node distance=2cm] {};
+            \node [block] (sys) [right of=sum, node distance=2cm]{system};
+            \node [coordinate] (split) [right of=sys, node distance=1cm]{};
+            \node [coordinate] (end) [right of=split, node distance=1cm]{};
+            \node (in) [above of=sum, node distance=1.5cm] {};
+            \node [coordinate] (mid) [below of=sum, node distance=1cm] {};
+
+            \draw[->] (in) edge node[pos=0.2, align=left] {experimental\\excitation input} (sum);
+            \draw[->] (stab) edge node {} (sum);
+            \draw[->] (sum) edge node {} (sys);
+            \draw (sys) edge (split);
+            \draw[->] (split) -- (end);
+            \draw (split) |- (mid);
+            \draw[->] (mid) -| (stab);
+        \end{tikzpicture}
+        %\vspace{3pt}
+        %\caption*{Closed Loop System}
+    \end{figure}
+\end{rem}
+
+\section{4SID procedure (noise-free)} \label{sec:4SID-NF}
+
+\paragraph{Step 1} \label{4SID-NF:step1} Build the Hankel matrix in increasing order and each time compute the rank of the matrix.
+
+\[
+    H_1 = \begin{bmatrix}
+        \omega(1)
+    \end{bmatrix}
+    \qquad
+    H_2 = \begin{bmatrix}
+        \omega(1) & \omega(2) \\
+        \omega(2) & \omega(3)
+    \end{bmatrix}
+    \qquad
+    H_3 = \ldots
+    \qquad
+    \cdots
+    \qquad
+    H_n = \ldots
+\]
+
+Suppose that $\rank (H_i) = i \quad \forall i \in \{1, \dots, n\}$ \quad and $\rank (H_{n+1}) = n$.\\
+If this happens, it means that we found the first Hankel matrix which in not full rank and it also means that we have estimated (found) the order of the system.
+
+\paragraph{Step 2} Take $H_{n+1}$ (\textbf{recall}: $\rank (H_{n+1}) = n$) and factorize it in two rectangular matrices of size $(n+1) \times n$ and $n \times (n+1)$.
+
+\[
+    H_{n+1} = \begin{bmatrix}
+        \text{extended} \\
+        \text{observability} \\
+        \text{matrix:} \\
+        O_{n+1}
+    \end{bmatrix} \cdot \begin{bmatrix}
+        \text{extended} \\
+        \text{controllability} \\
+        \text{matrix:} \\
+        R_{n+1}
+    \end{bmatrix}
+\]
+
+where $O_{n+1} = \begin{bmatrix}
+    \color{blue}H \\ HF \\ \vdots \\ HF^n
+\end{bmatrix}$ of size $(n+1)\times n$ and $R_{n+1} = \begin{bmatrix}
+    \color{red}G & FG & \cdots & F^nG
+\end{bmatrix}$ of size $n\times (n+1)$.
+
+\paragraph{Step 3} \label{4SID-NF:step3} $H$, $F$, $G$ estimation.
+
+Using $O_{n+1}$ and $R_{n+1}$ we can easily find:
+\begin{align*}
+    \hat{G} =  \color{red} R_{n+1}(\texttt{:;1}) & \quad\text{(first column of $R_{n+1}$)} \\
+    \hat{H} =  \color{blue} O_{n+1}(\texttt{1;:}) & \quad\text{(first row of $O_{n+1}$)} \\
+\end{align*}
+
+To estimate $\hat{F}$ consider $O_{n+1}$ (or, similarly, $R_{n+1}$):\\
+define $O'$ as $O_{n+1}$ without the last row, and $O''$ as $O_{n+1}$ without the first row, that is
+\[O' = O_{n+1}\texttt{(1:n;:)} = \begin{bmatrix}
+        H \\
+        HF \\
+        \vdots \\
+        HF^{n-1}
+    \end{bmatrix}\]
+
+\[ O'' = O_{n+1}\texttt{(2:n+1;:)} = \begin{bmatrix}
+        HF \\
+        HF^2 \\
+        \vdots \\
+        HF^{n}
+    \end{bmatrix}\]
+
+\textbf{Note} $O'$ and $O''$ are $n\times n$ matrices.
+
+\textbf{Note} It is easy to verify that $O'$ and $O''$ follow the \emph{shift-invariance} property: $O''= O' F$
+
+
+Thanks to the latter property, if we assume that $O'$ is invertible, we can find $\hat{F} = (O')^{-1}O''$.
+
+\paragraph{Conclusions}
+In conclusion, in a simple and \emph{constructive} (i.e. NON parametric) way we have estimated a \acrlong{ss} model of the system starting from measured \emph{noise-free} \gls{ir}, using only $2n+1$ samples of \gls{ir}. \\
+In practice $n \ll N$, where, again, $n$ is the order of the system and $N$ the number of sample in the dataset.
+
+\[ \left\{\hat{H}, \hat{G}, \hat{F}\right\} = \left\{O_{n+1}(\texttt{1;:}), R_{n+1}(\texttt{:;1}), (O')^{-1}O'' \right\}\]
+
+\begin{rem}
+    If the measurement is noisy all this process is \textbf{useless}. Also, if $n$ is unknown, \nameref{4SID-NF:step1} could never stop.
+\end{rem}
+
+\clearpage
+
+\section{4SID procedure (with Noise)} \label{sec:4SID-N}
+
+In reality, the measurements of \gls{ir} are noisy: $\tilde{\omega}(t) = \omega(t) + \eta(t)$. Therefore, the \textbf{real problem} is the estimation of the system taking into account the noise; indeed, the available dataset will be $\tilde{\omega}(t) = \{\tilde{\omega}(0), \tilde{\omega}(1), \cdots, \tilde{\omega}(N)\}$.  
+
+\begin{figure}[H]
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+            \draw[->] (0,-0.5) -- (0,1.7) node[left] {$u(t)$};
+            \draw[domain=-0.3:0,smooth,variable=\x,red] plot ({\x},{0});
+            \draw[domain=0:0.2,smooth,variable=\x,red] plot ({\x},{1.5});
+            \draw[domain=0.2:2.5,smooth,variable=\x,red] plot ({\x},{0});
+        \end{tikzpicture}
+        \caption*{Impulse in input}
+    \end{minipage}
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$t$};
+            \draw[->] (0,-0.5) -- (0,1.7) node[left] {$y(t)$};
+            \draw[domain=0:2.5,smooth,variable=\x,red] plot ({\x},{cos(\x*180/3.14*5)*e^(-\x)});
+            \draw[samples=100, domain=0:2.5,smooth,variable=\x,blue] plot ({\x},{cos(\x*180/3.14*5)*e^(-\x)+rand/4});
+        \end{tikzpicture}
+        \caption*{\gls{ir}}
+    \end{minipage}
+\end{figure}
+
+
diff --git a/lectures/2022_04_12.tex b/lectures/2022_04_12.tex
new file mode 100644
index 0000000..519fe7d
--- /dev/null
+++ b/lectures/2022_04_12.tex
@@ -0,0 +1,341 @@
+%!TEX root = ../main.tex
+
+\paragraph{Step 1} Build the Hankel matrix from data using ``one-shot'' all the available $N$ data points.
+
+\[
+    \tilde{H}_{qd} = \begin{bmatrix}
+        \tilde{\omega}(1) & \tilde{\omega}(2) & \cdots & \tilde{\omega}(d) \\
+        \tilde{\omega}(2) & \tilde{\omega}(3) & \cdots & \tilde{\omega}(d+1) \\
+        \vdots            & \vdots            & \ddots & \vdots \\
+        \tilde{\omega}(q) & \tilde{\omega}(q+1) & \cdots & \tilde{\omega}(q+d-1) \\
+    \end{bmatrix}
+\]
+
+$\tilde{H}_{qd}$ is a $q\times d$ matrix.\\
+\textbf{Note} As before, we start from $\tilde{\omega}(1)$ since we are assuming $\tilde{\omega}(0)=0$. \\
+\textbf{Note} $q+d-1$ must equal to $N$ so that we use all the dataset.
+
+
+\begin{rem}[Choice of $q$ and $d$]
+    Hypothesis: $q<d$ so that $q+d-1=N \Rightarrow q=N+1-d$.
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (3,0) node[right] {$d$};
+            \draw[->] (0,-0.5) -- (0,3) node[left] {$q$};
+            \draw[domain=0.3:2.7,smooth,variable=\x,black] plot ({\x},{3-\x});
+            \draw[line width=0.6mm,domain=1.5:1.8,smooth,variable=\x,green] plot ({\x},{3-\x});
+            \draw[line width=0.6mm,domain=2.3:2.7,smooth,variable=\x,blue] plot ({\x},{3-\x});
+            \draw[dotted] (0,0) -- (1.5,1.5);
+
+            \node at (2.3,1.5) {$q \approx d$};
+            \node at (3.1,0.7) {$q \ll d$};
+
+            \node[below] at (0.3,0) {$1$};
+            \node[below] at (2.7,0) {$N$};
+            \node[left] at (0,0.3) {$1$};
+            \node[left] at (0,2.7) {$N$};
+        \end{tikzpicture}
+    \end{figure}
+
+    If $q \approx d$ the method has better accuracy but high computational effort.
+    
+    If $q \ll d$ it's computationally less intensive but has the worst accuracy.
+
+    \textbf{Rule of thumb} If $ q > \frac{d}{2}$ we get to a good enough result.
+\end{rem}
+
+\paragraph{Step 2} \emph{Singular Value Decomposition (SVD)} of $\tilde{H}_{qd}$
+
+\[
+    \underbrace{\tilde{H}_{qd}}_{q\times d} = \underbrace{\tilde{U}}_{q\times q} \underbrace{\tilde{S}}_{q\times d} \underbrace{\tilde{V}\transpose}_{d\times d}
+\]
+
+where $\tilde{U}$ and $\tilde{V}$ are \emph{square} and \emph{unitary} matrices and $\tilde{S}$ is a \emph{rectangular diagonal} matrix such that:
+
+\[
+    \tilde{S} = \begin{bmatrix}
+        \sigma_1 & & & & &\\
+        & \sigma_2 & & & &\\
+        & & \ddots & & &\\
+        & & & \sigma_q & &\\
+    \end{bmatrix}
+\]
+
+where $\sigma_1$, $\sigma_2$, $\ldots$, $\sigma_q$ are the \emph{singular values} of $\tilde{H}_{qd}$.
+Those are real, positive numbers, sorted in decreasing order (i.e. $\sigma_1 \ge \sigma_2 \ge \cdots \ge \sigma_q$).
+
+\begin{defn}[Unitary matrix]
+    A matrix $M$ is \emph{unitary} if:
+    \begin{itemize}
+        \item $\det (M) = 1$ \quad (this implies that $M$ is invertible)
+        \item $M^{-1} = M\transpose$
+    \end{itemize}
+\end{defn}
+
+\begin{rem}
+    The singular values of a rectangular matrix are a \emph{sort of eigenvalues} of a square matrix.\\
+    SVD is a \emph{sort of diagonalization} of a rectangular matrix.
+\end{rem}
+
+\begin{rem}[Eigenvalues and Singular Values]
+    For a square matrix $A$, $\text{eig}(A) = \text{roots}(\det(A-\lambda I))$. \\
+    If a matrix $M$ is rectangular, $SV(M) = \sqrt{\text{eig}(MM\transpose)}$ (for non zero eigenvalues).
+\end{rem}
+
+\begin{rem}[How to compute SVD]
+    The optimal numerical computation is not trivial. 
+
+    Theoretical method for SVD computation is to make 2 diagonalization steps:
+    \[
+        \underbrace{\tilde{H}_{qd} \tilde{H}_{qd}\transpose}_{q\times q} = \tilde{U}\tilde{S}\tilde{S}\transpose\tilde{U}\transpose
+    \]
+    \[
+        \underbrace{\tilde{H}_{qd}\transpose \tilde{H}_{qd}}_{d\times d} = \tilde{V}\tilde{S}\transpose\tilde{S}\tilde{V}\transpose
+    \]
+    
+    Instead, use \texttt{svd(M)} in Matlab.
+\end{rem}
+
+\paragraph{Step 3} Plot the singular values and separate (cut-off) the system from noise.
+
+\begin{figure}[H]
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (0,0) -- (5,0) node[right] {$i$};
+            \draw[->] (0,0) -- (0,2) node[left] {$\sigma_i$};
+            \node at (0.5,1.8) {\textbullet};
+            \node at (1.0,1.3) {\textbullet};
+            \node at (1.5,1.0) {\textbullet};
+            \node at (2.0,0.9) {\textbullet};
+            \node at (2.5,0.8) {\textbullet};
+            \node at (3.0,0.2) {\textbullet};
+            \node at (3.5,0.2) {\textbullet};
+            \node at (4.0,0.2) {\textbullet};
+            \node at (4.5,0.2) {\textbullet};
+            \draw (2.5,-0.1) -- (2.5,0.1);
+            \node at (2.5,-0.4) {$n$};
+
+            \draw[decoration={brace}, decorate] (0.5,2) node {} -- (2.5,1);
+            \node at (2.5,1.8) {signal SV};
+
+            \draw[decoration={brace}, decorate] (3,0.4) node {} -- (4.5,0.4);
+            \node at (3.75,0.8) {noise SV};
+        \end{tikzpicture}
+        \caption*{Ideal case}
+    \end{minipage}
+    \begin{minipage}[t]{0.5\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (0,0) -- (5,0) node[right] {$i$};
+            \draw[->] (0,0) -- (0,2) node[left] {$\sigma_i$};
+            \node at (0.5,1.8) {\textbullet};
+            \node at (1.0,1.3) {\textbullet};
+            \node at (1.5,1.0) {\textbullet};
+            \node at (2.0,0.8) {\textbullet};
+            \node at (2.5,0.6) {\textbullet};
+            \node at (3.0,0.4) {\textbullet};
+            \node at (3.5,0.3) {\textbullet};
+            \node at (4.0,0.2) {\textbullet};
+            \node at (4.5,0.2) {\textbullet};
+            \draw (2,-0.1) rectangle ++(1,0.2);
+            \node at (2.5,-0.4) {$n$};
+
+            \draw[decoration={brace}, decorate] (2,1) node {} -- (3,1);
+            \node at (2.5,1.4) {transition};
+        \end{tikzpicture}
+        \caption*{Real case}
+    \end{minipage}
+\end{figure}
+
+In the ideal case there is a perfect/clear separation between the signal and the noise singular values (a jump).
+The index of the jump is $n$, that is the order of the system.
+
+In the real case there is no clear distinction between signal and noise singular values, since the order of the system can assume values in an interval.
+With some empirical test we can select a good compromise between complexity, precision and overfitting (see \emph{cross-validation}).
+
+After the decision on the value of $n$ we split $\tilde{H}_{qd}$ in $\tilde{U}$, $\tilde{S}$ and $\tilde{V}\transpose$:
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}
+        \tikzset{BarreStyle/.style = {opacity=.3,line width=4 mm,color=blue}}
+        \node (H) {$\tilde{H}_{qd} =$};
+        \matrix (U) [right of=H, node distance=2cm,matrix of math nodes, nodes in empty cells,left delimiter={[},right delimiter={]},text depth=0ex,text height=1ex,text width=1ex]
+        {
+            & & & & \\
+            & & & & \\
+            \hat{U} & & \tilde{U} & & \\
+            & & & & \\
+            & & & & \\
+        };
+        \matrix (S) [right of=U,node distance=3cm,matrix of math nodes, nodes in empty cells,left delimiter={[},right delimiter={]},text depth=0ex,text height=1ex,text width=1ex]
+        {
+            \hat{S} & & & & \\
+            & & & & \\
+            & & \tilde{S} & & \\
+            & & & & \\
+            & & & & \\
+        };
+        \matrix (V) [right of=S,node distance=3cm,matrix of math nodes, nodes in empty cells,left delimiter={[},right delimiter={]},text depth=0ex,text height=1ex,text width=1ex]
+        {
+            & & \hat{V}\transpose & & \\
+            & & & & \\
+            & & \tilde{V}\transpose & & \\
+            & & & & \\
+            & & & & \\
+        };
+
+        \draw[decoration={brace}, decorate] (1,1.2) node {} -- (1.5,1.2);
+        \node at (1.25,1.5) {$q\times n$};
+        \draw[decoration={brace}, decorate] (4,1.2) node {} -- (4.5,1.2);
+        \node at (4.25,1.5) {$n\times n$};
+        \draw[decoration={brace}, decorate] (7,1.2) node {} -- (9,1.2);
+        \node at (8,1.5) {$n\times d$};
+
+        \draw [BarreStyle] (U-1-1.north) to (U-5-1.south);
+        \draw [BarreStyle] (S-1-1.west) to (S-1-1.east);
+        \draw [BarreStyle] (V-1-1.west) to (V-1-5.east);
+    \end{tikzpicture}
+\end{figure}
+
+
+\[
+    \tilde{H}_{qd} = \underbrace{\hat{U} \hat{S} \hat{V}\transpose}_{\hat{H}_{qd}} + H_{res,qd} \qquad \rank (\tilde{H}_{qd}) = q \quad \rank (\hat{H}_{qd}) = n \quad \rank (H_{res,qd}) = q
+\]
+
+where $\hat{H}_{qd}$ is the \emph{signal part} and $H_{res,qd}$ is the \emph{noise (or residual) part} of $\tilde{H}_{qd}$.\\
+
+\textbf{Note} $\hat{S}$ is now a square diagonal matrix of $\sigma_1, \dots, \sigma_q$.\\
+\textbf{Note} From $\tilde{H}_{qd}$ to $\hat{H}_{qd}$ the rank is hugely reduced.\\
+\textbf{Note} $\hat{H}_{qd}$ is the "cleaned" Hankel matrix.
+
+\paragraph{Step 4} Estimation of $\hat{F}$, $\hat{G}$ and $\hat{H}$ using the cleaned matrix $\hat{H}_{qd}$
+
+\[
+    \hat{H}_{qd} = \hat{U} \hat{S} \hat{V}\transpose = \hat{U} \hat{S}^{\frac{1}{2}} \hat{S}^{\frac{1}{2}} \hat{V}\transpose
+\]
+
+where $\hat{S}^{\frac{1}{2}}$ is the square diagonal matrix with elements the square roots of the elements of $\hat{S}$, that is $\hat{S}^{\frac{1}{2}}=diag(\sqrt{\sigma_1}, \dots, \sqrt{\sigma_q})$. \\
+
+Defining $\hat{O} = \hat{U}\hat{S}^{\frac{1}{2}}$ and $\hat{R} = \hat{S}^{\frac{1}{2}} \hat{V}\transpose$, $\hat{H}_{qd}$ becomes 
+\[\hat{H}_{qd} = \underbrace{\hat{O}}_{q\times n} \underbrace{\hat{R}}_{n\times d}\]
+
+
+We can view $\hat{O}$ as the \emph{extended observability matrix} and the $\hat{R}$ the \emph{extended reachability matrix} of the system. \\
+
+Now it is possible to estimate $\hat{H}$ with the first row of $\hat{O}$ and $\hat{G}$ with the first column of $\hat{R}$, similarly to the \nameref{4SID-NF:step3} of the noise-free case.
+
+\begin{align*}
+    \hat{G} = \hat{R}(\texttt{:;1}) & \quad\text{(first column of $\hat{R}$)} \\
+    \hat{H} = \hat{O}(\texttt{1;:}) & \quad\text{(first row of $\hat{O}$)} \\
+\end{align*}
+
+
+
+What about the estimation of $\hat{F}$? Let's try to proceed again similarly to what we have done at the \nameref{4SID-NF:step3} of the noise-free case.
+Consider for example $\hat{O}$ and define $\hat{O'}$ as $\hat{O}$ without the last row, and $\hat{O''}$ as $\hat{O}$ without the first row.
+
+Using the \emph{shift-invariance} property we have that $\hat{O'} \hat{F} = \hat{O''}$, but $\hat{O'}$ is not a square matrix so it's NOT invertible.
+To avoid this problem we can use the approximate \emph{least-squares} solution.
+
+
+\begin{rem}[Solution of Linear Systems]
+    Consider a generic system $Ax = B$ with dimension  $(h\times n) \cdot (n \times 1) = (h \times 1)$. We have 3 different cases:
+    \begin{enumerate}
+        \item $h < n$. We have less equations than variables: the system is \emph{under determined} and we have infinite solutions.
+        \item $h = n$. We have one and only one solution if $A$ is invertible.
+        \item $h > n$. We have more equations than variables: the system is \emph{over determined} and it's impossible (no solutions).
+    \end{enumerate}
+\end{rem}
+
+\begin{rem}[Least-Squares Method]
+    In case we have more equations than variable (i.e. $h>n$) we can use an approximate solution using the least-squares method, which for a generic system is as follows:
+    \begin{align*}
+        Ax &= B \\
+           &\Downarrow\\
+        A\transpose A x &= A\transpose B \implies \hat{X} = \underbrace{(A\transpose A)^{-1}A\transpose}_{A^+} B
+    \end{align*}
+    
+    $A^+$ is called \emph{pseudo-inverse}, which is ``surrogate inverse'' when $A$ is rectangular.
+\end{rem}
+
+
+Applying the least-square method to $\hat{O'} \hat{F} = \hat{O''}$ we have
+\begin{align*}
+    \hat{O'}\hat{F} = \hat{O''} \quad \Rightarrow \quad
+    (\hat{O'})\transpose\hat{O'}\hat{F} = (\hat{O'})\transpose\hat{O''} \quad \Rightarrow \quad 
+    \hat{F} = \left((\hat{O'})\transpose\hat{O'}\right)^{-1} (\hat{O'})\transpose\hat{O''}
+\end{align*}
+
+
+\paragraph{Conclusions} Starting from a noisy \gls{ir} $\widetilde{\omega}(t) = \{\widetilde{\omega}(1), \widetilde{\omega}(2), \ldots, \widetilde{\omega}(N)\}$ we have estimated a model $\{\hat{F}, \hat{G}, \hat{H}\}$ in a non-parametric and constructive way.
+\[ \left\{\hat{H}, \hat{G}, \hat{F} \right\} = \left \{\hat{O}(\texttt{1;:}), \hat{R}(\texttt{:;1}), \left((\hat{O'})\transpose\hat{O'}\right)^{-1} (\hat{O'})\transpose\hat{O''} \right \}\]
+
+\begin{rem}
+    This method can be extended also to the case where the input signal is generic (i.e. not an impulse).
+\end{rem}
+
+\begin{rem}[Optimality of 4SID]
+    The method is \emph{optimal} in the sense that it makes the best possible rank reduction of $\tilde{H}_{qd}$, that is from $q$ to $n$.
+    
+    However, notice that in general there are infinite ways to make a rank reduction.
+\end{rem}
+
+\begin{rem}[Rank Reduction]
+    Our goal is to obtain the desired rank reduction by discarding the minimum amount of information contained in the original matrix.
+    SVD makes exactly this: $H_{res,qd}$ has the minimum possible size in the sense of the \emph{Frobenius norm}:
+    \[
+        \left\Vert H_{res,qd} \right\Vert_\text{F} = \sqrt{\sum_{ij} \left(H_{res,qd}^{(ij)} \right)^2}
+    \]   
+\end{rem}
+
+\begin{exa}[Rank Reduction]
+    
+    If we reduce a matrix $M$ as follows: 
+    \[
+        \underbrace{\begin{bmatrix}
+            2 & 5 & 3 & 6 & 5 \\
+            5 & 3 & 6 & 5 & 7 \\
+            3 & 6 & 5 & 7 & 1
+        \end{bmatrix}}_{M}
+        =
+        \underbrace{\begin{bmatrix}
+            1 & 0 & 0 & 0 & 0 \\
+            0 & 1 & 0 & 0 & 0 \\
+            0 & 0 & 0 & 0 & 0
+        \end{bmatrix}}_{\hat{M}}
+        +
+        \underbrace{
+        \begin{bmatrix}
+            1 & 5 & 3 & 6 & 5 \\
+            5 & 2 & 6 & 5 & 7 \\
+            3 & 6 & 5 & 7 & 1
+        \end{bmatrix}}_{M_{res}}
+    \]
+
+    where
+    \[ \rank(M) = 3 \qquad \rank(\hat{M}) = 2 \qquad \rank(M_{res}) = 3 \]
+
+    this is not the optimal rank reduction, since it factors out the matrix $\hat{M}$ with lower rank but a lot of information of the original matrix $M$ is lost in the residual matrix $M_{res}$.
+\end{exa}
+
+
+\begin{rem}
+    4SID is a constructive method that can be implemented in a fully-automatic way, except for these steps:
+    \begin{itemize}
+        \item $q$ and $d$ selection (not critical)
+        \item choice of $n$ (typically supervised by the designer). It can be made automatic using a cross-validation method.
+    \end{itemize}
+\end{rem}
+
+\begin{rem}
+    SVD was an historical turning point in machine learning algorithms because it allows:
+    \begin{itemize}
+        \item very efficient compression of information.
+        \item very efficient separation of \emph{important} information from noise.
+        \item order reduction of a model.
+    \end{itemize}
+    Notice that those are 3 different prospective of the same general problem.
+\end{rem}
diff --git a/lectures/2022_04_14.tex b/lectures/2022_04_14.tex
new file mode 100644
index 0000000..9f4f0ee
--- /dev/null
+++ b/lectures/2022_04_14.tex
@@ -0,0 +1,468 @@
+%!TEX root = ../main.tex
+
+\begin{exa}[similar to an exam exercise]
+    Consider the following \gls{ss} model
+    \[
+    F = \begin{bmatrix}
+        \frac{1}{2} & 0 \\
+        1   & \frac{1}{4}
+    \end{bmatrix}
+    \qquad
+    G = \begin{bmatrix}
+        1 \\ 0
+    \end{bmatrix}
+    \qquad
+    H = \begin{bmatrix}
+        0 & 1
+    \end{bmatrix}
+    \qquad
+    D = 0
+    \]
+    \textbf{Note} Given the size of $F$ the system has grade $n=2$ and it is strictly-proper single-input single-output system. \\
+    \textbf{Note} Since $F$ is triangular, the eigenvalues are on the diagonal and $\eig (F) = \{\frac{1}{2}, \frac{1}{4}\}$ and they have both absolute value less than one, thus the system is \emph{asymptotically stable}.
+
+    \paragraph{Question 1} Write the time domain equations of the system in the state space representation.
+    \[
+        \Sc:
+        \begin{cases}
+            x_1(t+1) &= \frac{1}{2}x_1(t) + u(t) \\
+            x_2(t+1) &= x_1(t) + \frac{1}{4}x_2(t) \\
+            y(t) &= x_2(t)
+        \end{cases}
+    \]
+
+    \paragraph{Question 2} Write the block scheme of the \gls{ss} representation of the system.
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node [block] (in1) {$1$};
+            \node [left of=in1] (u) {$u(t)$};
+            \node [sum, right of=in1, node distance=1.5cm] (sum1) {};
+            \node [block, right of=sum1, node distance=2.5cm] (n1) {$z^{-1}$};
+            \node [block, below of=n1, node distance=1.5cm] (fb1) {$\frac{1}{2}$};
+            \node [coordinate, right of=n1] (exit1) {};
+            \node [block, below of=fb1, node distance=1.5cm] (in2) {$1$};
+            \node [block, below of=in2, node distance=1.5cm] (n2) {$z^{-1}$};
+            \node [sum, left of=n2, node distance=2.5cm] (sum2) {};
+            \node [coordinate, right of=n2] (exit2) {};
+            \node [block, below of=n2, node distance=1.5cm] (fb2) {$\frac{1}{4}$};
+            \node [block, right of=exit2, node distance=1.5cm] (out2) {$1$};
+            \node [right of=out2, node distance=1.5cm] (y) {$y(t)$};
+
+            \draw[->] (u) -- (in1);
+            \draw[->] (in1) -- (sum1);
+            \draw[->] (sum1) edge node {$x_1(t+1)$} (n1);
+            \draw (n1) edge node {$x_1(t)$} (exit1);
+            \draw[->] (exit1) |- (fb1);
+            \draw[->] (exit1) |- (in2);
+            \draw[->] (fb1) -| (sum1);
+            \draw[->] (in2) -| (sum2);
+            \draw[->] (sum2) edge node {$x_2(t+1)$} (n2);
+            \draw (n2) edge node {$x_2(t)$} (exit2);
+            \draw[->] (exit2) |- (fb2);
+            \draw[->] (fb2) -| (sum2);
+            \draw[->] (exit2) -- (out2);
+            \draw[->] (out2) -- (y);
+        \end{tikzpicture}
+    \end{figure}
+
+
+    By visual inspection \emph{seems} that the system is fully observable and fully controllable.
+
+    \paragraph{Question 3} Make a formal verification that the system is fully observable and controllable.
+    \[
+        O = \begin{bmatrix}
+            H \\ HF
+        \end{bmatrix} = \begin{bmatrix}
+            0 & 1 \\
+            1 & \frac{1}{4}
+        \end{bmatrix}
+        \qquad
+        \rank (O) = 2 = n
+    \]
+    \[
+        R = \begin{bmatrix}
+            G & FG
+        \end{bmatrix} = \begin{bmatrix}
+            1 & \frac{1}{2} \\
+            0 & 1
+        \end{bmatrix}
+        \qquad
+        \rank (R) = 2 = n
+    \]
+    This confirms our visual inspection.
+    
+    Let's compute the extended ($n+1 = 2+1$) $O_3$ and $R_3$:
+    \[
+        F^{2} = F F = 
+        \begin{bmatrix}
+            \frac{1}{2} & 0 \\
+            1 & \frac{1}{4} 
+        \end{bmatrix} 
+        \begin{bmatrix}
+            \frac{1}{2} & 0 \\
+            1 & \frac{1}{4} 
+        \end{bmatrix} = 
+        \begin{bmatrix}
+            \frac{1}{4} & 0 \\
+            \frac{3}{4} & \frac{1}{16} 
+        \end{bmatrix}
+    \]
+    \[
+        O_3 = \begin{bmatrix}
+            H \\ HF \\ HF^2
+        \end{bmatrix} = \begin{bmatrix}
+            0 & 1 \\
+            1 & \frac{1}{4} \\
+            \frac{3}{4} & \frac{1}{16}
+        \end{bmatrix}
+    \]
+    \[
+        R_3 = \begin{bmatrix}
+            G & FG & F^2G
+        \end{bmatrix} = \begin{bmatrix}
+            1 & \frac{1}{2} & \frac{1}{4} \\
+            0 & 1 & \frac{3}{4} \\
+        \end{bmatrix}
+    \]
+
+    \paragraph{Question 4} Compute the transfer function representation.
+
+    First method: direct manipulation of \gls{ss} equations
+    \[
+        \Sc:
+        \begin{cases}
+            x_1(t+1) &= \frac{1}{2}x_1(t) + u(t) \\
+            x_2(t+1) &= x_1(t) + \frac{1}{4}x_2(t) \\
+            y(t) &= x_2(t)
+        \end{cases}
+    \]
+    \begin{align*}
+        zx_1(t) - \frac{1}{2}x_1(t) = u(t) \qquad &\implies \qquad x_1(t) = \frac{1}{z-\frac{1}{2}}u(t) \\
+        zx_2(t) - \frac{1}{4}x_2(t) = \frac{1}{z-\frac{1}{2}}u(t) \qquad &\implies \qquad x_2(t) = \frac{1}{(z-\frac{1}{4})(z-\frac{1}{2})}u(t) \\
+        y(t) = \frac{1}{(z-\frac{1}{4})(z-\frac{1}{2})}u(t)
+    \end{align*}
+    \[
+        W(z) = \frac{1}{(z-\frac{1}{4})(z-\frac{1}{2})}
+    \]
+
+    There are 2 poles: $z=\frac{1}{4}$ and $z=\frac{1}{2}$. Since the system is fully observable and fully controllable, the poles correspond to the eigenvalues of $F$.
+
+    Second method: use the transformation \ref{t1}
+    \[
+        W(z) = H(zI-F)^{-1}G = \begin{bmatrix}
+            0 & 1
+        \end{bmatrix} \begin{bmatrix}
+            z-\frac{1}{2} & 0 \\
+            -1 & z-\frac{1}{4}
+        \end{bmatrix}^{-1} \begin{bmatrix}
+            1 \\ 0
+        \end{bmatrix} = \frac{1}{(z-\frac{1}{4})(z-\frac{1}{2})}
+    \]
+
+    \paragraph{Question 5} Write I/O time-domain representation
+
+    \[
+        y(t) = \frac{1}{z^2-\frac{3}{4}z+\frac{1}{8}}u(t) = \frac{z^{-2}}{1-\frac{3}{4}z^{-1}+\frac{1}{8}z^{-2}}u(t) = \frac{3}{4}y(t-1) - \frac{1}{8}y(t-2) + u(t-2)
+    \]
+
+    \paragraph{Question 6} Compute the first 6 values (including $\omega(0)$) of \gls{ir}.
+
+    We decide to compute it from the \gls{tf} $W(z) = \frac{z^{-2}}{1-\frac{3}{4}z^{-1}+\frac{1}{8}z^{-2}}$.
+    Performing the 5-step long division of $W(z)$ the result is $z^{-2}+\frac{3}{4}z^{-3}+\frac{7}{16}z^{-4}+\frac{15}{64}z^{-5}$.
+
+    \begin{align*}
+        \omega(0) &= 0 &
+        \omega(1) &= 0 &
+        \omega(2) &= 1 \\
+        \omega(3) &= \frac{3}{4} &
+        \omega(4) &= \frac{7}{16} &
+        \omega(5) &= \frac{15}{64}
+    \end{align*}
+
+    \textbf{Note} Also $\omega(1)=\omega(0)=0$: this means that the delay $k=2$, not just $1$.
+    
+    \paragraph{Question 7} Build the Hankel matrix and stop when the rank is not full (noise-free case).
+
+    \[
+        H_1 = \begin{bmatrix}
+            0
+        \end{bmatrix}
+        \qquad \rank (H_1) = 0
+    \]
+    \[
+        H_2 = \begin{bmatrix}
+            0 & 1 \\
+            1 & \frac{3}{4}
+        \end{bmatrix}
+        \qquad \rank (H_2) = 2
+    \]
+    \[
+        H_3 = \begin{bmatrix}
+            0 & 1 & \frac{3}{4} \\
+            1 & \frac{3}{4} & \frac{7}{16} \\
+            \frac{3}{4} & \frac{7}{16} & \frac{15}{64}
+        \end{bmatrix}
+        \qquad \rank (H_3) = 2 \neq 3
+    \]
+    $H_3$ is not full rank so the order of the system is 2 (as we already know). 
+    
+    It can be proved that $O_3R_3 = H_3$.
+\end{exa}
+
+\chapter{Parametric \acrlong{bb} system identification of I/O system using a frequency domain approach}
+
+So far we have seen:
+\begin{itemize}
+    \item In MIDA1 parametric black-box identification of I/O systems (\gls{armax}) and time series (\gls{arma})
+    \item In Chapter 1 non-parametric black-box identification of I/O systems (4SID)
+\end{itemize}
+
+The \textbf{frequency domain approach} is a black-box and \textbf{parametric} approach, and it's very used in practice since it's very robust and reliable.
+
+Since it's parametric it uses the 4 usual steps:
+\begin{enumerate}
+    \item Experiment design and data pre-processing. A special type of experiment and data pre-processing is needed.
+    \item Selection of parametric model class ($\Mc(\theta)$)
+    \item Definition of a performance index ($J(\theta)$). A new special performance index is needed.
+    \item Optimization ($\hat{\theta} = \argmin_\theta J(\theta)$)
+\end{enumerate}
+
+1. and 2. are the special steps of this chapter.\\
+
+The general intuitive idea of the method is:
+\begin{itemize}
+    \item Make a set of ``single sinusoid'' (``single-tune'') excitation experiments
+    \item From each experiment estimate a single point of the frequency response of the system
+    \item Fit the estimated and modeled frequency response to obtain the optimal model
+\end{itemize}
+
+\clearpage
+
+\begin{exa}[Car steer dynamics]
+Let's picture a top-view of a car that is steering.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \draw (0,0) rectangle ++(2,3.5);
+            
+            \draw (0.1,0.1) rectangle ++(0.3,1);
+            \draw (1.6,0.1) rectangle ++(0.3,1);
+
+            \draw[rotate around={30:(0,3.5)}] (-0.15,3) rectangle ++(0.3,1);
+            \draw[rotate around={30:(2,3.5)}] (1.85,3) rectangle ++(0.3,1);
+
+            \draw[dotted] (0,3.5) -- (0,5);
+            \draw[dotted, rotate around={30:(0,3.5)}] (0,3.5) -- (0,5);
+            \node at (-0.3,4.9) {$\delta_F$};
+
+            \draw (1,1.75) circle [radius=0.2];
+            \draw (1,2.05) -- (1,1.45);
+            \draw (0.7,1.75) -- (1.3,1.75);
+
+            \draw[->,>=stealth',semithick] (1.5,1.75) arc[radius=0.5, start angle=0, end angle=90];
+            \node at (1.5,2.25) {$\omega_z$};
+        \end{tikzpicture}
+    \end{figure}
+
+    where $\delta_F$ is the \emph{steer angle} (input) and $\omega_z$ is the \emph{rotational speed} around the vertical axis $z$.
+
+    This kind of dynamics relationship is very important for stability control systems design (ESG/ESP) and for autonomous cars.
+
+    There are 3 possible situations:
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \node[block] (sys1) {1};
+            \node[left of=sys1] (in1) {$\delta_F$};
+            \node[right of=sys1] (out1) {$\omega_z$};
+            \draw[->, red] (in1) -- (sys1);
+            \draw[->] (sys1) -- (out1);
+
+            \node[block, right of=sys1, node distance=4cm] (sys2) {2};
+            \node[left of=sys2] (in2) {$\delta_R$};
+            \node[above of=sys2, node distance=1cm] (in2b) {$\delta_F$};
+            \node[right of=sys2] (out2) {$\omega_z$};
+            \draw[->, red] (in2) -- (sys2);
+            \draw[->] (in2b) -- (sys2);
+            \draw[->] (sys2) -- (out2);
+
+            \node[block, right of=sys2, node distance=4cm] (sys3) {3};
+            \node[left of=sys3, yshift=0.3cm] (in3a) {$\delta_F$};
+            \node[left of=sys3, yshift=-0.3cm] (in3b) {$\delta_R$};
+            \node[right of=sys3] (out3) {$\omega_z$};
+            \draw[->, red] (in3a) -- (sys3);
+            \draw[->, red] (in3b) -- (sys3);
+            \draw[->] (sys3) -- (out3);
+        \end{tikzpicture}
+    \end{figure}
+
+    \begin{enumerate}
+        \item The control variable is the \emph{front steer} $\delta_F$. This is the case of autonomous cars
+        \item The human driver controls $\delta_F$ which is a measurable disturbance while the system controls the \emph{rear steer} $\delta_R$ 
+        \item Both $\delta_R$ and $\delta_F$ are control variables: application high performance autonomous car
+    \end{enumerate}
+    
+    Imagine to make an experiment where you make a sinusoid of the steer and you get the rotational speed as output.
+\end{exa}
+
+\section{Steps of the system identification with frequency domain method }
+
+\paragraph{Step 1} In the experiment design step we first have to select a set of \emph{excitation frequencies}.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \draw[->] (0,0) -- (7.5,0) node[right] {$\omega$};
+        \draw (0,0.1) -- (0,-0.1) node[below] {$0$};
+        \draw (0.8,0.1) -- (0.8,-0.1) node[below] {$\omega_1$};
+        \draw (1.6,0.1) -- (1.6,-0.1) node[below] {$\omega_2$};
+        \draw (2.4,0.1) -- (2.4,-0.1) node[below] {$\omega_3$};
+        \draw (3.2,0.1) -- (3.2,-0.1) node[below] {$\ldots$};
+        \draw (4,0.1) -- (4,-0.1) node[below] {$\omega_H$};
+        \draw (4.8,0.1) -- (4.8,-0.1) node[below] {$\ldots$};
+        \draw (5.6,0.1) -- (5.6,-0.1) node[below] {$\omega_N$};
+        \draw (6.4,0.1) -- (6.4,-0.1) node[below] {$\ldots$};
+        \draw (7.2,0.1) -- (7.2,-0.1) node[below] {$\omega_S$};
+    \end{tikzpicture}
+\end{figure}
+
+where $\omega_S$ is the \emph{sampling frequency}, $\omega_N=\frac{1}{2}\omega_S$ is the \emph{Nyquist frequency} (which is the maximum frequency of a digital system) and $\omega_H$ is the maximum explored frequency.
+We have $\{\omega_1, \omega_2, \cdots, \omega_H\}$ usually evenly spaced ($\Delta \omega$ is constant).
+$\omega_H$ must be selected according to the bandwidth of the control system.
+
+We make $H$ \emph{independent} experiments.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \node[block] (sys) {system};
+        \node[left of=sys, node distance=3cm] (in) {};
+        \node[left of=in] (in2) {};
+        \node[right of=sys, node distance=2.5cm] (out) {};
+        \node[right of=out, node distance=2cm] (out2) {};
+        \node[below of=in, node distance=0.5cm] {$u_1(t) = A_1\sin(\omega_1t)$};
+        \draw[xshift=-4cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x] plot ({\x},{sin(\x*180/3.14)});
+
+        \node[below of=out, node distance=0.5cm] {$y_1(t)$};
+        \draw[xshift=1.5cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x] plot ({\x},{1.5*sin(\x*180/3.14+30)});
+        \draw[->] (in2) -- (sys);
+        \draw[->] (sys) -- (out2);
+    \end{tikzpicture}
+    \caption*{Experiment \#1}
+\end{figure}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \node[block] (sys) {system};
+        \node[left of=sys, node distance=3cm] (in) {};
+        \node[left of=in] (in2) {};
+        \node[right of=sys, node distance=2.5cm] (out) {};
+        \node[right of=out, node distance=2cm] (out2) {};
+        \node[below of=in, node distance=0.5cm] {$u_2(t) = A_2\sin(\omega_2t)$};
+        \draw[xshift=-4cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x] plot ({\x},{sin(2*\x*180/3.14)});
+
+        \node[below of=out, node distance=0.5cm] {$y_2(t)$};
+        \draw[xshift=1.5cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x] plot ({\x},{1.5*sin(2*\x*180/3.14+30)});
+        \draw[->] (in2) -- (sys);
+        \draw[->] (sys) -- (out2);
+    \end{tikzpicture}
+    \caption*{Experiment \#2}
+\end{figure}
+
+\vspace{-20pt}
+\[ \vdots \]
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \node[block] (sys) {system};
+        \node[left of=sys, node distance=3cm] (in) {};
+        \node[left of=in] (in2) {};
+        \node[right of=sys, node distance=2.5cm] (out) {};
+        \node[right of=out, node distance=2cm] (out2) {};
+        \node[below of=in, node distance=0.5cm] {$u_H(t) = A_H\sin(\omega_Ht)$};
+        \draw[xshift=-4cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x,samples=50] plot ({\x},{sin(5*\x*180/3.14)});
+
+        \node[below of=out, node distance=0.5cm] {$y_H(t)$};
+        \draw[xshift=1.5cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x,samples=50] plot ({\x},{1.5*sin(5*\x*180/3.14+30)});
+        \draw[->] (in2) -- (sys);
+        \draw[->] (sys) -- (out2);
+    \end{tikzpicture}
+    \caption*{Experiment \#$H$}
+\end{figure}
+
+
+\begin{rem}
+    The amplitudes $A_1$, $A_2$, \ldots, $A_H$ can be equal (constant) or, more frequently in practice, they decrease as the frequency increases to fulfill the power constraint on the input.
+
+    Indeed, if $\delta(t)$ is the steering angle (moved by an actuator), the requested steer torque is proportional to $\delta$: $T(t) = K \delta(t)$.
+    Therefore the steer power is proportional to $T(t) \dot{\delta}(t) = K \delta(t)\dot{\delta}(t)$.
+    If $\delta(t) = A_i\sin(\omega_it)$ the steering power is $KA_i\sin(\omega_it)\omega_iA_i\cos(\omega_it)$ which is proportional to $KA_i^2\omega_i$.
+
+    If we have a limit to this power, this power should be constant during the $H$ experiments, thus
+    \[KA_i^2\omega_i = \text{const} \qquad \implies \qquad A_i=\sqrt{\frac{\text{const}}{K\omega_i}}\]
+    and this means that the amplitude must be inversely proportional to the frequency.
+\end{rem}
+
+Focusing on the $i$-th experiment
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \node[block] (sys) {system};
+        \node[left of=sys, node distance=3cm] (in) {};
+        \node[left of=in] (in2) {};
+        \node[right of=sys, node distance=2.5cm] (out) {};
+        \node[right of=out, node distance=2cm] (out2) {};
+        \node[below of=in, node distance=0.5cm] {$u_i(t) = A_i\sin(\omega_it)$};
+        \draw[xshift=-4cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x] plot ({\x},{sin(2*\x*180/3.14)});
+
+        \node[below of=out, node distance=0.5cm] {$y_i(t)$};
+        \draw[xshift=1.5cm,yshift=0.5cm,scale=0.2,domain=0:10,smooth,variable=\x,samples=100] plot ({\x},{1.5*sin(2*\x*180/3.14+30)+rand/3});
+        \draw[->] (in2) -- (sys);
+        \draw[->] (sys) -- (out2);
+    \end{tikzpicture}
+\end{figure}
+
+\begin{rem} [Frequency Response Theorem for LTI Systems]
+    If the system is LTI (linear time-invariant), the frequency response theorem says that if the input is a sine input of frequency $\omega_i$ the output must be a sine with frequency $\omega_i$ but with different amplitude or phase.
+\end{rem}
+
+However $y_i(t)$ in real applications is not a perfect sinusoid because of those non-ideal behaviours:
+\begin{itemize}
+    \item Noise on output measurements
+    \item Noise on the system not directly related to measurements (e.g. roughness of the road).
+    \item (Small) non-linear effects (that we neglect, since we will use LTI local approximations of the system)
+\end{itemize}
+
+In pre-processing of I/O data we want to extract from $y_i(t)$ a perfect sinusoid of frequency $\omega_i$.
+We force the assumption that the system is LTI, so the output must be a pure sine wave of frequency $\omega_i$ (all the remaining signal is noise).
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+    
+        \draw[red, domain=0:8,smooth,variable=\x,samples=100,scale=0.6] plot ({\x},{1.5*sin(2*\x*180/3.14+30)});
+        
+        \draw[domain=0:8,smooth,variable=\x,samples=100,scale=0.6] plot ({\x},{1.5*sin(2*\x*180/3.14+30)+rand/3});
+        
+        \draw[] (5,1.2) --++ (1,0)
+            node[midway,above] {$y(t)$};
+        \draw[red] (5,0.5) --++ (1,0)
+            node[midway,above] {$\hat{y}(t)$};    
+    \end{tikzpicture}
+\end{figure}
+
+The model of the output signal is
+\[ \hat{y}_i = B_i \sin(\omega_it+\phi_i) = a_i\sin(\omega_it) + b_i\cos(\omega_it) \]
+There are 2 unknowns: $B_i$ and $\phi_i$ (or $a_i$ and $b_i$).
+It's better to use the model with $a_i$ and $b_i$ since it's \emph{linear} in those parameters.
+
+The unknown parameters are $a_i$ and $b_i$ and we can find them by parametric identification.
+\[ \{ \hat{a}_i, \hat{b}_i \} = \argmin_{\{a_i, b_i\}} J_N(a_i, b_i) \]
+\[ J_N(a_i, b_i) = \frac{1}{N} \sum_{t=1}^N \left ( y_i(t) - \hat{y}_i(t) \right ) ^ 2 = \frac{1}{N} \sum_{t=1}^N \left( \underbrace{y_i(t)}_{\text{measurement}} \underbrace{- a_i\sin(\omega_it) - b_i\cos(\omega_it)}_\text{model output}\right)^2 \]
+
+Since the model is linear in $a_i$ and $b_i$, $J_N$, which is the \emph{sample variance of the modelling error}, is a \emph{quadratic function} of $a_i$ and $b_i$. Thus, we can solve the problem explicitly.
+
diff --git a/lectures/2022_04_20.tex b/lectures/2022_04_20.tex
new file mode 100644
index 0000000..e7c48fb
--- /dev/null
+++ b/lectures/2022_04_20.tex
@@ -0,0 +1,337 @@
+%!TEX root = ../main.tex
+
+\begin{align*}
+    \frac{\partial J_N}{\partial a_i} &= \frac{2}{N} \sum_{t=1}^N (-\sin(\omega_it))(y_i(t) - a_i\sin(\omega_it)-b_i\cos(\omega_it)) = 0 \\
+    \frac{\partial J_N}{\partial b_i} &= \frac{2}{N} \sum_{t=1}^N (-\cos(\omega_it))(y_i(t) - a_i\sin(\omega_it)-b_i\cos(\omega_it)) = 0
+\end{align*}
+
+This is a $2 \times 2$ linear system which can be written in matrix form
+
+\[
+    \begin{bmatrix}
+        \sum_{t=1}^N \sin(\omega_it)^2 & \sum_{t=1}^N \sin(\omega_it)\cos(\omega_it) \\
+        \sum_{t=1}^N \sin(\omega_it)\cos(\omega_it) & \sum_{t=1}^N \cos(\omega_it)^2
+    \end{bmatrix}
+    \begin{bmatrix}
+        a_i \\ b_i
+    \end{bmatrix} =
+    \begin{bmatrix}
+        \sum_{t=1}^N y_i(t)\sin(\omega_it) \\
+        \sum_{t=1}^N y_i(t)\cos(\omega_it)
+    \end{bmatrix}
+\]
+
+At this point we found $\left \{ \hat{a}_i, \hat{b}_i \right \}$ and now we prefer to go back to a \emph{sin-only} form ($B_i$, $\phi_i$) of the estimated sinusoid $\hat{y}_i(t)$:
+\[
+    \hat{B}_i\sin(\omega_it + \hat{\phi}_i) = \hat{B}_i\sin(\omega_it)\cos(\hat{\phi}_i) + \hat{B}_i\cos(\omega_it)\sin(\hat{\phi_i}) \stackrel{\text{!}}{=} \hat{a}_i\sin(\omega_it) + \hat{b}_i\cos(\omega_it)
+\]
+
+\[\Downarrow\]
+
+\[
+    \begin{cases}\label{sys:Bi}\tag{$\bigstar$}
+         \hat{B}_i\cos(\hat{\phi}_i) = \hat{a}_i  \\
+         \hat{B}_i\sin(\hat{\phi}_i) = \hat{b}_i 
+    \end{cases}
+\]
+
+\[\Downarrow\]
+
+\[
+    \frac{\hat{b}_i}{\hat{a}_i} = \frac{\sin\hat{\phi}_i}{\cos{\hat{\phi}_i}} = \tan(\hat{\phi_i}) \implies \hat{\phi}_i = \arctan \left( \frac{\hat{b}_i}{\hat{a}_i} \right)
+\]
+\[
+    \hat{B}_i = \frac{\frac{\hat{a}_i}{\cos\hat{\phi}_i} + \frac{\hat{b}_i}{\sin\hat{\phi}_i}}{2} \quad \text{average of the the 2 equations in \eqref{sys:Bi}}
+\]
+
+Therefore, we have now found a bidirectional mapping $ \{ \hat{a}_i, \hat{b}_i \} \iff \{ \hat{B}_i, \hat{\phi}_i \}$ and so, an approximation of the measured output $y_i(t)$ which is $\hat{y}_i(t) = \hat{B}_i\sin(\omega_it + \hat{\phi}_i)$ for each frequency $\omega_i$. 
+
+Repeating the experiment and pre-processing for the $H$ experiments we can compute 
+
+\begin{align*}
+    \{ \hat{B}_1, \hat{\phi}_1 \} &\mapsto \frac{\hat{B}_1}{A_1} e^{j\hat{\phi}_1} \\
+    \{ \hat{B}_2, \hat{\phi}_2 \} &\mapsto \frac{\hat{B}_2}{A_2} e^{j\hat{\phi}_2} \\
+    \vdots& \\
+    \{ \hat{B}_H, \hat{\phi}_H \} &\mapsto \frac{\hat{B}_H}{A_H} e^{j\hat{\phi}_H} \\
+\end{align*}
+
+where each $\frac{\hat{B}_i}{A_i} e^{j\hat{\phi}_i}$ is a complex number that is the estimated point at frequency $\omega_i$ of the frequency response of the transfer function $W(z)$ from the input $u(t)$ to the output $y(t)$ of the system.
+
+\begin{rem}[Frequency Response Theorem for LTI systems]
+    If for a LTI system characterized by a \gls{tf} $W(z)$ the input is $A_i \sin (\omega_i t)$ and the output is $\hat{B}_i \sin (\omega_i t + \hat{\phi}_i)$, then 
+    \[ W(z=e^{j \omega_i}) = |W(e^{j \omega_i})| e^{j \angle W(e^{j \omega_i})} = \frac{\hat{B}_i}{A_i} e^{j\hat{\phi}_i} \]
+     is the corresponding frequency response of the system at that frequency ($\omega_i$).
+    
+    In particular, $\frac{\hat{B}_i}{A_i}$ is the ratio between the output and input amplitude and $\hat{\phi}_i$ is the phase shift.
+\end{rem}
+
+We can now plot these H point in a classical \emph{Bode plot} exploiting that $|W(e^{j \omega_i})| = \frac{\hat{B}_i}{A_i}$ and that $\angle W(e^{j \omega_i}) = \hat{\phi}_i$.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \draw[->] (0,0) -- (5,0) node[right] {$\omega$};
+        \draw[->] (0,-1.5) -- (0,3) node[left] {$|\cdot|$};
+        \draw[domain=0:4,mark=*,only marks,samples=8,variable=\x] plot ({\x},{5*log10(2/(1+((2^\x)^2)/100))});
+        \draw (4,0.1) -- ++(0,-0.2) node[below] {$\omega_H$};
+
+        \draw[->] (0,-3) -- (5,-3) node[right] {$\omega$};
+        \draw[->] (0,-4.5) -- (0,-1.7) node[left] {$\angle\cdot$};
+        \draw[domain=0:4,mark=*,only marks,samples=8,variable=\x] plot ({\x},{2*(atan(2/(1+((2^\x)^2)/100))/180*3.14)-5.2});
+        \draw (4,-2.9) -- ++(0,-0.2) node[below] {$\omega_H$};
+    \end{tikzpicture}
+\end{figure}
+
+At the end of \emph{step 1} we have a frequency-domain dataset ($H$ values) representing $H$ estimated points of the frequency response of the system.
+
+\paragraph{Step 2} Parametric model class (\gls{tf}) selection
+
+\[
+    \Mc(\theta): W(z; \theta) = \frac{b_0+b_1z^{-1}+\cdots+b_pz^{-p}}{1+a_1z^{-1}+\cdots+a_nz^{-n}}z^{-1}
+    \qquad
+    \theta = \begin{bmatrix}
+        a_1 \\ \vdots \\ a_n \\ b_0 \\ \vdots \\ b_p
+    \end{bmatrix}
+\]
+
+\begin{rem}[Model order selection]
+    In this case the order is composed by 2 parameters $n$ and $p$:
+    use cross-validation approach (or visual fitting in the Bode diagram) for finding the best choice of these parameters.
+\end{rem}
+
+\paragraph{Step 3} New performance index: variance of the error in frequency domain.
+
+\[
+    J_H(\theta) = \frac{1}{H} \sum_{i=1}^H \left | W(e^{j\omega_i}; \theta) - \frac{\hat{B}_i}{A_i}e^{j\hat{\phi}_i} \right | ^2
+\]
+
+where $W(e^{j\omega_i}; \theta)$ is the modeled frequency response and $\frac{\hat{B}_i}{A_i}e^{j\hat{\phi}_i}$ is the measured frequency response.
+
+\paragraph{Step 4} Optimization
+
+\[
+    \hat{\theta}_H = \argmin_\theta J_H(\theta)
+\]
+
+Usually $J_H(\theta)$ is a \emph{non-quadratic} and \emph{non-convex function}; Thus, iterative optimization methods are needed.
+
+\paragraph{Conclusion} We have obtained the estimated model which is a \gls{tf}
+
+\[
+    \Mc(\hat{\theta}_H): W(z; \hat{\theta}_H)
+\]
+
+
+\begin{rem}[Frequency bandwidth selection $\omega_H =\; ?$]
+    Theoretically the standard best solution should be $H$ points distributed uniformly from 0 to $\omega_N$ (Nyquist freq.).
+
+    In practice it's better to concentrate the experimental effort in a smaller and more focused bandwidth.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \draw[->] (0,0) -- (5,0) node[right] {$\omega$};
+            \draw (0,0.1) -- (0,-0.1) node[below] {$0$};
+            \draw (0.8,0.1) -- (0.8,-0.1) node[below] {$\omega_c$};
+            \draw (2,0.1) -- (2,-0.1) node[below] {$\omega_N$};
+            \draw (4,0.1) -- (4,-0.1) node[below] {$\omega_S$};
+        \end{tikzpicture}
+    \end{figure}
+
+    where $\omega_c$ is the \emph{expected cut-off frequency} of the closed system.
+    
+    \textbf{Rule of thumb} $\omega_H \approx 3\omega_c$
+
+    \paragraph{Example} The Electronic Stability Control (ESC) system has an expected bandwidth of $\omega_c \approx 4 \text{Hz}$, so $\omega_H \approx 12\text{Hz}$.
+\end{rem}
+
+\begin{rem}[Emphasis on special frequency range]
+    In some cases, between $\omega_1$ and $\omega_H$, we want to be more accurate in system identification in some frequency regions (typically around cut-off-frequency or around resonances).
+    
+    %TODO: replace this img with tikz figure
+    \begin{figure}[H]
+        \centering
+        \includegraphics[scale=4.5]{./img/freq-emphasis.png}
+    \end{figure}
+
+    We can manage this selected focus on estimation precision using non-uniform weights $\lambda_i$ (different weights for different frequencies). Graphically:
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[]
+            \begin{axis}[axis lines=none,ymax=2.5]
+                \addplot[color=blue,mark=square]
+                    coordinates {
+                        (0,1)(1,1)(2,1)(3,1.5)(4,2)(5,1.5)(6,1)(7,1)(8,1)
+                    };
+            \end{axis}
+
+            \draw[->] (-0.5,0) -- (7,0) node[right] {$\omega$};
+            \draw[->] (0.5,-0.5) -- (0.5,5) node[left] {$\lambda_i$};
+            \node at (0.1,0.5) {$1$};
+            \draw (6.3,0.1) -- (6.3,-0.1) node [below] {$\omega_H$};
+            \draw (1.3,0.1) -- (1.3,-0.1) node [below] {$\omega_1$};
+            \draw (2,0.1) -- (2,-0.1) node [below] {$\omega_2$};
+            \draw (3.5,0.1) -- ++(0,-0.2) node [below] {$\omega_x$};
+        \end{tikzpicture}
+    \end{figure}
+ 
+    where $\omega_x$ is the frequency of interest.
+    
+    
+    The performance index can be redefined:
+    \[
+        \tilde{J}_H (\theta) = \frac{1}{H} \sum_{i=1}^H \lambda_i \left | W(e^{j\omega_i};\theta) - \frac{\hat{B}_i}{A_i}e^{j\hat{\phi}_i} \right | ^2
+    \]
+
+    Another \textbf{trick}: more dense $\omega_i$ spacing in the frequency region of special interest (not really used).
+\end{rem}
+
+\begin{rem}[Single experiment]
+    Sometimes the set of $H$ independent single-sinusoid experiments can be replaced by a long single ``sine-sweep'' experiment, that is to measure the frequency response of the system when the input is sinusoid that starts with frequency $\omega_1$ and amplitude $A_1$ and ends with frequency $\omega_H$ and amplitude $A_H$. 
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[->] (-0.5,0) -- (5,0) node[right] {$t$};
+            \draw[->] (0,-1) -- (0,2) node[left] {$u(t)$};
+            \draw[domain=0:4.5,samples=100,smooth,variable=\x,blue] plot ({\x},{cos(\x*180/3.14*(\x^1.7+5))*e^(-\x/2.5)});
+        \end{tikzpicture}
+        \caption*{Slowing-varying sinusoid with increasing frequency and decreasing amplitude.}
+    \end{figure}
+
+    The output will be a single long signal $y(t)$.
+    
+    We can cut a-posteriori the signal into $H$ pieces, and then back to the standard procedure
+    or we can directly compute an estimation of $\hat{W}(e^{j\omega})$ as a ration of the output and input complex spectra %(recalling that $\Gamma_y(\omega) = |W(z)|^2 \Gamma_u(\omega)$)
+
+    \[
+        \hat{W}(e^{j\omega}) = \frac{\hat{\Gamma}_y(e^{j\omega})}{\hat{\Gamma}_u(e^{j\omega})}
+    \]
+
+    We can fit the estimated $\hat{W}(e^{j\omega})$ with the model frequency response $W(e^{j\omega}; \theta)$ in the performance index.
+    
+    \[
+        \frac{1}{H} \sum_{i=1}^H \left| W(e^{j \omega_i}; \theta) - \hat{W}(e^{j \omega_i}) \right|^2
+    \]
+    where, similarly, $W(e^{j\omega_i}; \theta)$ is the modeled frequency response and $\hat{W}(e^{j \omega_i})$ is the measured frequency response.
+    
+    This experiment is quicker but has usually a lower signal-to-noise-ration.
+\end{rem}
+
+\begin{rem}[Experiment on unstable system]
+    What happen if the system is \emph{open-loop} unstable? We have to stop the experiment because of the instability of the output (which of course diverges).
+    
+    \begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+        \node[block] (sys) {$\Sc$};
+        \node[left of=sys, node distance=3cm] (in) {};
+        \node[left of=in] (in2) {};
+        \node[right of=sys, node distance=2.5cm] (out) {};
+        \node[right of=out, node distance=2cm] (out2) {};
+
+        \draw[xshift=-4cm,yshift=0.5cm,scale=0.2,domain=0:4*pi,smooth,variable=\x] plot ({\x},{sin(8*\x r)});
+
+        \draw[xshift=1cm,yshift=0.5cm,scale=0.2,domain=0:4*pi,smooth,variable=\x,samples=100] plot ({\x},{sin(8*\x r + 10)*exp(0.15*\x)});
+        \draw[->] (in2) -- (sys);
+        \draw[->] (sys) -- (out2);
+    \end{tikzpicture}
+\end{figure}
+    
+    We can avoid this problem by performing the experiment in \emph{closed-loop}  and adding a \emph{stability controller}.
+    
+        \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block,align=center] (stab) {stability\\controller};
+            \node [block] (sys) [right of=sum, node distance=2cm]{$\Sc$};
+            \node [coordinate] (split) [right of=sys, node distance=1cm]{};
+            \node [] (end) [right of=split, node distance=3cm]{$y(t)$};
+            \node [] (in) [left of=stab, node distance =4cm]{$\bar{y}(t)$};
+            \node [coordinate] (mid) [below of=sum, node distance=1.5cm] {};
+
+            \draw[->] (in) -- (stab);
+            \draw[->] (stab) -- (sys);
+            \draw (sys) edge (split);
+            \draw[->] (split) -- (end);
+            \draw (split) |- (mid);
+            \draw[->] (mid) -| (stab);
+            
+            \draw[xshift=-3.6cm, yshift=0.5cm, scale=0.2, domain=0:4*pi, smooth, variable=\x] plot ({\x}, {sin(8*\x r)});
+            \draw[blue, xshift=1cm, yshift=0.5cm, scale=0.2, domain=0:4*pi, smooth, variable=\x] plot ({\x}, {1.5*sin(8*\x r + 5)});
+            \draw[blue, xshift=4.5cm, yshift=0.5cm, scale=0.2, domain=0:4*pi, smooth, variable=\x] plot ({\x}, {0.7*sin(15*\x r + 7)});
+            
+        \end{tikzpicture}
+    \end{figure}
+    
+    where the blue sinusoids are the signal reflecting the dynamic of the system and, measuring them, they will compose the dataset I/O of the system. 
+    
+    \textbf{Note} that the model identified using that dataset will be a model of an unstable system.
+    
+    This is just a trick to collect data from an unstable system.
+\end{rem}
+
+
+
+\section{Comparison between time domain and frequency domain parametric methods}
+
+\paragraph{Frequency domain}
+\begin{description}
+    \item[Pro] Robust and very reliable because each experiment has a big signal-to-noise ratio (since we are forcing all the signal energy on a single clean sinusoid).
+    \item[Pro] Intuitive since it is easy to understand.
+    \item[Pro] Consistent with many control-design methods that work in the frequency domain.
+    \item[Cons] More demanding in terms of design of the experiment.
+    \item[Cons] Provides no noise model (unlike the \gls{pem} method of \gls{armax} system identification)
+    \end{description}
+
+\textbf{Note} that F.D. and T.D. methods should provide approximately the same result if done correctly.
+
+
+\chapter{Software (virtual) Sensing model-based in feedback method using Kalman Filter framework}
+
+In MIDA1 we have mostly used I/O \acrfull{tf} representations:
+\[ y(t) = \frac{B(z)}{A(z)}u(t-k) + \frac{C(z)}{A(z)}e(t) \qquad e(t) \sim \WN \]
+
+
+Kalman filter theory is fully based on \acrfull{ss} representation
+
+\[
+    \begin{cases}
+        x(t+1) = Fx(t) + Gu(t) + v_1(t)  & \qquad v_1 \sim \WN \\
+        y(t) = Hx(t) + \cancel{Du(t)} + v_2(t) & \qquad v_2 \sim \WN
+    \end{cases}
+\]
+
+where we assume that the system model is given (typically obtained in a white-box approach): it's not a system identification problem! Indeed we are interested in the internal state of the system.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node [block, align=center] (sys) {$\Sc$ \\ \vspace{10pt} \qquad \\ \qquad$x(t)$};
+        \node (in) [left of=sys, node distance=2cm] {$u(t)$};
+        \node (out) [right of=sys, node distance=2cm]{$y(t)$};
+        \node (wn1) [above of=sys, node distance=1.5cm, xshift=-1cm] {$v_1(t)$};
+        \node (wn2) [above of=sys, node distance=1.5cm, xshift=1cm] {$v_2(t)$};
+
+        \draw[->] (in) -- (sys);
+        \draw[->] (sys) -- (out);
+        \draw[->] (wn1) -- (sys);
+        \draw[->] (wn2) -- (sys);
+    \end{tikzpicture}
+    \caption*{Two-noises model}
+\end{figure}
+
+
+\section{Motivation and Goals of Kalman Filter}
+
+Given a model description $\{F, G, H\}$ and noises variances (that's why it is not a system identification technique), with \acrfull{kf} theory we can address the following problems:
+
+\begin{itemize}
+    \item Find the $k$-steps ahead prediction of output: $\hat{y}(t+k|t)$ (already solved in MIDA1 with \gls{armax}).
+    \item Find the $k$-steps ahead prediction of state: $\hat{x}(t+k|t)$.
+    \item Find the filter of the state at present time: $\hat{x}(t|t)$. In practice it's a \emph{software-sensing} problem, which is the most important problem solved by Kalman filter (reason of why it is named Kalman \emph{filter}).
+    \item Gray box system identification (see \nameref{ch5}). We have a recorded data-set and the model structure with some unknown parameters.
+\end{itemize}
+
+
diff --git a/lectures/2022_04_21.tex b/lectures/2022_04_21.tex
new file mode 100644
index 0000000..6a225eb
--- /dev/null
+++ b/lectures/2022_04_21.tex
@@ -0,0 +1,560 @@
+%!TEX root = ../main.tex
+
+Dynamical systems have this layout:
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node [block, align=center] (sys) {$x_1(t)$\\$x_2(t)$\\$\vdots$\\$x_n(t)$};
+        \node (in1) [left of=sys, node distance=2cm, yshift=0.7cm] {$u_1(t)$};
+        \node (in2) [left of=sys, node distance=2cm] {$u_2(t)$};
+        \node (dots1) [left of=sys, node distance=2cm, yshift=-0.5cm]{$\vdots$};
+        \node (in3) [left of=sys, node distance=2cm, yshift=-1cm] {$u_m(t)$};
+        \node (out1) [right of=sys, node distance=2cm, yshift=0.7cm]{$y_1(t)$};
+        \node (out2) [right of=sys, node distance=2cm]{$y_2(t)$};
+        \node (dots2) [right of=sys, node distance=2cm, yshift=-0.5cm]{$\vdots$};
+        \node (out3) [right of=sys, node distance=2cm, yshift=-1cm]{$y_p(t)$};
+
+        \draw[->] (in1) -- (sys);
+        \draw[->] (in2) -- (sys);
+        \draw[->] (in3) -- (sys);
+        \draw[->] (sys) -- (out1);
+        \draw[->] (sys) -- (out2);
+        \draw[->] (sys) -- (out3);
+    \end{tikzpicture}
+\end{figure}
+
+where, in this case, the system is MIMO: it has $m$ inputs (actuators/control variables), $p$ outputs (sensors) and $n$ states.
+
+\textbf{Key problem} usually $p \ll n$, that is, physical sensors are much less than system states, because of:
+\begin{itemize}
+    \item cost
+    \item cables, power supply, installation
+    \item maintenance (faults, degradation)
+    \item existence of such sensors
+\end{itemize}
+
+That's why usually not all the states are measured (available). However, it is very useful to have full ``measurement'' of states because:
+\begin{itemize}
+    \item design of control algorithms (state feedback design)
+    \item monitoring of the system (fault detection, predictive maintenance, \dots)
+\end{itemize}
+
+This problem can be solved with \emph{software sensing (SW-sensing)} (also called virtual sensing) algorithms:
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node [block, align=center] (sys) {$\Sc$\\\qquad$x(t)$};
+        \node (in1) [left of=sys, node distance=2cm, yshift=0.7cm] {};
+        \node (in2) [left of=sys, node distance=2.3cm] {$u(t)$};
+        \node (in3) [left of=sys, node distance=2cm, yshift=-0.7cm] {};
+        \node (out1) [right of=sys, node distance=2cm, yshift=0.7cm]{};
+        \node (out2) [right of=sys, node distance=2.3cm]{$y(t)$};
+        \node (out3) [right of=sys, node distance=2cm, yshift=-0.7cm]{};
+
+        \node [block, align=center, below of=sys] (algo) {SW-sensing algorithm};
+        \node (algoout) [right of=algo, node distance=3cm] {$\hat{x}(t)$};
+        
+        \draw[<-, dashed] (sys.north) --++ (0,1)
+            node[right, align=center] {immeasurable \\ disturbances};
+        \draw[->] (in1) -- (sys);
+        \draw[->] (in2) -- (sys);
+        \draw[->] (in3) -- (sys);
+        \draw[->] (sys) -- (out1);
+        \draw[->] (sys) -- (out2);
+        \draw[->] (sys) -- (out3);
+
+        \draw[->] (-1.4,0.8) -- (-1.4,-1) -- (algo);
+        \draw[->] (1.4,0.8) -- (1.4,-1) -- (algo);
+        \draw[->] (algo) -- (algoout);
+    \end{tikzpicture}
+\end{figure}
+
+where $\hat{x}(t)$ is an estimation (or SW-sensing) of the internal state $x(t)$.
+
+\paragraph{Designer Dilemma} When using software sensing and when physical sensing?
+\begin{itemize}
+    \item In some cases there is no option (not feasible installation of a physical sensor)
+    \item In most cases both options are viable: variable vs fixed cost
+\end{itemize}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \draw[->] (-0.5,0) -- (4,0) 
+            node[right, align=center] {Fixed development costs\\of software sensing algorithm}
+            node[right, above] {€}; 
+        \draw[->] (0,-0.5) -- (0,4) 
+            node[left, align=right] {Variable cost proportional\\to the number of sensors}
+            node[above, right] {€};
+        \draw[domain=0:2.5,samples=20,smooth,variable=\x,red] plot ({\x},{sqrt(2.5^2-\x^2)});
+        
+        \draw[red] (-0.1,2.5) --++ (0.2, 0) node[left, xshift=-0.2cm] {\color{red} break-even};
+        \draw (2.5,0.1) --++ (0, -0.2) node[below] {$c$};
+        
+        \draw [decorate,decoration={brace,amplitude=10pt}] (0,0) -- (0,2.5) node [black,midway,align=right,xshift=-0.3cm] {Low volumes production:\\better physical sensing};
+        \draw [decorate,decoration={brace,amplitude=7pt}] (0,3.5) -- (0,2.5) node [black,midway,align=left,xshift=0.3cm] {High volumes production:\\better software sensing};
+    \end{tikzpicture}
+    \caption*{Break-even analysis}
+\end{figure}
+
+Given $c$, the fixed cost of development of the software sensor, it is possible to compute the break-event point.
+The break-even point coincides with the number of products such that above that number it is more convenient to use software sensing, while below that number it is more convenient to buy and install physical sensors per each product.
+
+Anyway, in some cases we might use both physical and software sensing for redundancy (e.g. in very safety-critical or mission-critical applications).
+
+\paragraph{Key questions for software sensing}
+
+\begin{itemize}
+    \item Is software-sensing feasible? Test is the observability of the states from measured outputs (through physical sensors).
+    \item If feasible, check if the level of noise (error) of the estimated variable is acceptable.
+\end{itemize}
+
+\begin{exa}[Slip estimation for ABS/traction control]
+Now we go back to the ABS example (described in \ref{abs_ex}) where we realized that SW-sensing is needed for the estimation of $v$, the horizontal velocity of the car, since it cannot be measured with a physical sensor.
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \draw (0.5,0) -- (0,0) -- (0,0.5) -- (1,0.5) -- (1.5,1) -- (2.5,1) -- (2.8,0.5) -- (3.3,0.5) -- (3.3,0) -- (3.1,0);
+            \draw (1.1,0) -- (2.5,0);
+            \draw (0.8,0) circle (0.3);
+            \draw (2.8,0) circle (0.3);
+
+            \draw[->] (-0.1,0.5) -- (-0.6,0.5) node[left] {$v$};
+            \draw[->] (0.8,0) -- (0.8,-0.3) node[below] {$r$};
+            \draw[->] (1.146,0.2) arc[radius=0.4, start angle=30, end angle=90];
+            \node at (1.3,0.5) {$\omega$};
+        \end{tikzpicture}
+    \end{figure}
+
+    
+    Indeed, measure of $v$ cannot be done with an optical sensor or a GPS:
+    they both have a problem of availability (not guaranteed). Physical sensing is not an option for industrial production.
+
+    Intuitive solution: install a longitudinal accelerometer ($a_x$) and integrate.
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \draw (0.5,0) -- (0,0) -- (0,0.5) -- (1,0.5) -- (1.5,1) -- (2.5,1) -- (2.8,0.5) -- (3.3,0.5) -- (3.3,0) -- (3.1,0);
+            \draw (1.1,0) -- (2.5,0);
+            \draw (0.8,0) circle (0.3);
+            \draw (2.8,0) circle (0.3);
+
+            \draw[->] (2.3,0.5) -- (1.7,0.5) node[above] {$a_x$};
+        \end{tikzpicture}
+    \end{figure}
+    \[
+        \hat{v} = \int a_x(t) dt \qquad \iff \qquad a_x(t) \rightarrow 1/s \rightarrow \hat{v}(t)
+    \]
+
+    In discrete time domain: discretization using approximation of derivative, Eulero forward method (see \nameref{appendix:discr})
+    \[
+        \frac{d}{dt} v(t) = a_x(t) \qquad \frac{dv(t)}{dt} \approx \frac{v(t+1)-v(t)}{\Delta T_s} = a_x(t)
+    \]
+
+    Where $\Delta T_s$ is the sampling interval (e.g. 10ms).
+
+    \[
+        \hat{v}(t) = \hat{v}(t-1) + \Delta T_s a_x(t-1)
+    \]
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block] (sys) {$\frac{\Delta T_s}{1-z^{-1}}$};
+            \node (in) [left of=sys, node distance=2cm] {};
+            \node (end) [right of=sys, node distance=2cm]{};
+
+            \draw[->] (in) edge node {$a_x(t)$} (sys);
+            \draw[->] (sys) edge node {$\hat{v}(t)$} (end);
+        \end{tikzpicture}
+    \end{figure}
+
+    Unfortunately the measured signal is not $a_x(t)$ but $a_x(t)+d_{a_x}(t)$.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \begin{axis}[axis lines=none,ymax=6]
+                \addplot[color=blue,smooth]
+                    coordinates {
+                        (0.00,2.35)(0.40,2.87)(0.80,2.99)(1.20,2.75)(1.60,2.83)(2.00,2.86)(2.40,3.19)(2.80,2.76)(3.20,2.45)(3.60,2.85)(4.00,2.55)(4.40,2.50)(4.80,2.64)(5.20,2.72)(5.60,3.26)(6.00,3.68)(6.40,3.95)(6.80,3.9)(7.20,4.00)(7.60,3.51)
+                    };
+                \addplot[color=red,smooth]
+                    coordinates {
+                        (0.00,2.35)(0.40,2.91)(0.80,3.07)(1.20,2.87)(1.60,2.99)(2.00,3.06)(2.40,3.43)(2.80,3.04)(3.20,2.77)(3.60,3.21)(4.00,2.95)(4.40,2.94)(4.80,3.12)(5.20,3.24)(5.60,3.82)(6.00,4.28)(6.40,4.59)(6.80,5.17)(7.20,5.0)(7.60,5.27)
+                    };
+                \draw[->] (-0.5,0) -- (800,0) node[right] {$t$};
+                \draw[->] (0.5,-0.5) -- (0.5,300) node[left] {};
+            \end{axis}
+        \end{tikzpicture}
+    \end{figure}
+
+    Integrating noise generates a \emph{drift}. Integrator is not an asymptotic stable system.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block] (sys) {$\frac{\Delta T_s}{1-z^{-1}}$};
+            \node [sum] (in) [left of=sys, node distance=1.5cm] {};
+            \node (in1) [above of=in, node distance=1cm] {$d_{a_x}(t)$};
+            \node (in2) [below of=in, node distance=1cm] {$a_x(t)$};
+            \node (end) [right of=sys, node distance=2cm]{};
+
+            \draw[->] (in) -- (sys);
+            \draw[->] (in1) -- (in) node[left, pos=0.8] {$+$};
+            \draw[->] (in2) -- (in) node[left, pos=0.8] {$+$};
+            \draw[->] (sys) edge node {$\hat{v}(t)$} (end);
+        \end{tikzpicture}
+    \end{figure}
+
+    \paragraph{Solution} Use a Kalman Filter.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block, align=center, minimum width=3cm] (sys) {car};
+            \node [block, align=center, minimum width=3cm, below of=sys] (algo) {Kalman Filter};
+            \node (algoout) [right of=algo, node distance=3cm] {$\hat{v}(t)$};
+
+            \draw[->,transform canvas={xshift=-1.2cm}] (sys) -- (algo) node[pos=0.5] {$\omega_1$};
+            \draw[->,transform canvas={xshift=-0.6cm}] (sys) -- (algo) node[pos=0.5] {$\omega_2$};
+            \draw[->] (sys) -- (algo) node[pos=0.5] {$\omega_3$};
+            \draw[->,transform canvas={xshift=0.6cm}] (sys) -- (algo) node[pos=0.5] {$\omega_4$};
+            \draw[->,transform canvas={xshift=1.2cm}] (sys) -- (algo) node[pos=0.5] {$a_x$};
+
+            \draw[->] (algo) -- (algoout);
+        \end{tikzpicture}
+    \end{figure}
+\end{exa}
+
+\begin{exa}[State of charge estimation of a battery]
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw (0,0) rectangle ++(1.5cm,3cm);
+            \draw (0.6cm,3cm) rectangle ++(0.3,0.15);
+
+            \draw [pattern=north west lines, pattern color=green] (0,0) rectangle (1.5,2);
+
+            \draw [decorate,decoration={brace,amplitude=10pt}] (0,0) -- (0,3) node [black,midway,align=right,xshift=-0.3cm] {100\%};
+            \draw [decorate,decoration={brace,amplitude=10pt}] (1.5,2) -- (1.5,0) node [black,midway,align=right,xshift=0.3cm] {SoC};
+
+            \node [block, align=center] (sys) at (6,1.5) {SoC\\internal\\state};
+            \node [left of=sys, node distance=2cm] (in1) {$i(t)$};
+            \node [above of=sys, node distance=2cm] (in2) {$T(t)$};
+            \node [right of=sys, node distance=2cm] (out) {$v(t)$};
+
+            \draw[->] (in1) -- (sys);
+            \draw[->] (in2) -- (sys);
+            \draw[->] (sys) -- (out);
+        \end{tikzpicture}
+    \end{figure}
+    \[
+        \text{SoC}(t) = 1 - \frac{\int i(t)dt}{I} \qquad 0 \le \text{SoC} \le 1
+    \]
+    Where $I$ is the total amount of \emph{current} that can be extracted by the user of the battery.
+    This solution is not feasible since it integrates the noise on $i(t)$.
+\end{exa}
+
+\section{Kalman Filter on Basic Systems}
+
+\begin{itemize}
+    \item No external inputs ($\cancel{Gu(t)}$): time series
+    \item Linear systems
+    \item Time invariant systems
+\end{itemize}
+
+The basic solution of this basic system is the 1-step prediction.\\
+After that, we will make the extensions to more general systems:
+\begin{itemize}
+    \item k-step prediction
+    \item filter $\hat{x}(t|t)$
+    \item time-varying systems
+    \item systems with exogenous inputs (presence of ${Gu(t)}$)
+    \item non-linear systems (Extended Kalman Filter (EKF))
+\end{itemize}
+
+\subsection{Detailed description of Basic System}
+
+The basic system we initially consider is a MIMO system with $n$ states, ($m$ inputs) and $p$ outputs.
+
+\[
+    \Sc: \begin{cases}
+        x(t+1) = Fx(t) + \cancel{Gu(t)} + v_1(t) & \text{state equation}\\
+        y(t) = Hx(t) + \cancel{Du(t)} + v_2(t) & \text{output equation}
+    \end{cases}
+\]
+\[
+    x(t) = \begin{bmatrix}
+        x_1(t) \\
+        x_2(t) \\
+        \vdots \\
+        x_n(t)
+    \end{bmatrix}
+    \qquad
+    \left(\;u(t) = \begin{bmatrix}
+        u_1(t) \\
+        u_2(t) \\
+        \vdots \\
+        u_m(t)
+    \end{bmatrix}\;\right)
+    \qquad
+    y(t) = \begin{bmatrix}
+        y_1(t) \\
+        y_2(t) \\
+        \vdots \\
+        y_p(t)
+    \end{bmatrix}
+\]
+
+\begin{description}
+    
+    \vspace{15pt}
+    
+    \item [State Noise] $v_1(t)$ is a vector white-noise.
+    \[
+        v_1(t) \sim \WN(0, V_1) \qquad v_1(t) = \begin{bmatrix}
+            v_{11}(t) \\
+            v_{12}(t) \\
+            \vdots \\
+            v_{1n}(t)
+        \end{bmatrix}
+    \]
+    
+    and it is called \emph{state noise} or \emph{model noise}.
+    It is used to model immeasurable noises affecting the system and small modelling errors.
+    
+    \textbf{Properties of $v_1(t)$}:
+    \begin{enumerate}
+        \item $\EE[v_1(t)] = \vec{0}$
+        \item $\EE[v_1(t) \cdot v_1\transpose(t)] = V_1$, where $V_1$ is an $n\times n$ covariance matrix (square, symmetric and semi-definite positive by definition)
+        \item $\EE[v_1(t) \cdot v_1\transpose(t-\tau)] = [0]_{n \times n} \quad \forall t \quad \forall \tau \ne 0$ (\emph{whiteness} property)
+    \end{enumerate}
+    
+    \vspace{15pt}
+    
+    \item [Output Noise] $v_2(t)$ is a vector white-noise.
+    \[
+        v_2(t) \sim \WN(0, V_2) \qquad v_2(t) = \begin{bmatrix}
+            v_{21}(t) \\
+            v_{22}(t) \\
+            \vdots \\
+            v_{2p}(t)
+        \end{bmatrix}
+    \]
+    
+    and it is called \emph{output noise} or \emph{sensor noise}. It is the noise affecting the output sensor measurements.
+    
+    \textbf{Properties of $v_2(t)$}:
+    \begin{enumerate}
+        \item $\EE[v_2(t)] = \vec{0}$
+        \item $\EE[v_2(t) \cdot v_2\transpose(t)] = V_2$, where $V_2$ is a $p\times p$ covariance matrix (square, symmetric and semi-definite positive\footnote{We make the \textbf{assumption that $V_2$ is definite positive} (i.e. $V_2 > [0]_{n \times p}$) because we will need this property in the Riccati equation.} by definition)
+        \item $\EE[v_2(t) \cdot v_2\transpose(t-\tau)] = [0]_{n \times n} \quad \forall t \quad \forall \tau \ne 0$ (\emph{whiteness} property)
+    \end{enumerate}
+
+\end{description}
+
+\textbf{Assumptions} about the relationships between $v_1(t)$ and $v_2(t)$:
+\[
+    \EE[v_1(t) \cdot v_2\transpose(t-\tau)] = \begin{cases} \label{sys:corrV12} \tag*{$\clubsuit$}
+        [0]_{n \times p} & \text{if } \tau \ne 0 \\
+        V_{12} & \text{if } \tau = 0
+    \end{cases}
+\]
+
+where $V_{12}$ is a cross-correlation matrix of size $n\times p$.
+
+The system \eqref{sys:corrV12} means that $v_1$ and $v_2$ can be correlated only at the same time, but, in practice, $V_{12}=0$ is the most common assumption. Thus, in practice
+
+\[\EE[v_1(t) \cdot v_2\transpose(t-\tau)] = [0]_{n \times p} \quad \forall t \forall \tau  \qquad \qquad \text{($1^{st}$ assumption)} \]
+
+Since the system $\Sc$ is dynamic we need to define the (in this case, probabilistic) initial conditions:
+\[
+    \EE[x(1)] = \underbrace{X_0}_{n\times 1} \qquad \EE[\left(x(1) - X_0)\right)\left(x(1)-X_0\right)\transpose] = \underbrace{P_0}_{n\times n} \ge [0]_{n \times n}
+\]
+
+If the covariance matrix $P_0 = [0]_{n \times n}$ the initial state is perfectly known.
+
+Finally we assume that the two noises $v_1(t)$ and $v_2(t)$ are uncorrelated with the initial state:
+\[
+    x(1) \perp v_1(t) \qquad x(1) \perp v_2(t) \qquad \qquad \text{($2^{nd}$ assumption)}
+\]
+
+\textbf{Note} From now on, \emph{null matrices} $[0]_{n \times m}$ will be simply denoted by $0$. 
+
+\subsection{KF Basic Solution of the Basic System}\label{subsec:KF-basic_sol}
+
+%\renewcommand\nameeq[2]{\phantom{\text{#2}}&&&\text{#2}}
+
+\begin{flalign}\label{eq:KF-state}
+    \nameeq{\hat{x}(t+1|t) = F\hat{x}(t|t-1) + K(t)e(t)}{State eq.}
+\end{flalign}    
+
+\begin{flalign}\label{eq:KF-out}    
+    \nameeq{\hat{y}(t|t-1) = H\hat{x}(t|t-1)}{Output eq.}
+\end{flalign} 
+
+\begin{flalign}\label{eq:KF-pred-err}    
+    \nameeq{e(t) = y(t) - \hat{y}(t|t-1)}{Prediction output error eq.}
+\end{flalign} 
+
+\begin{flalign}\label{eq:KF-gain}    
+    \nameeq{K(t) = \left( FP(t)H\transpose+V_{12} \right) \left( HP(t)H\transpose+V_2 \right)^{-1}}{Gain of the filter}
+\end{flalign} 
+
+\begin{flalign}\label{eq:KF-DRE}    
+    \nameeq{P(t+1) = \left( FP(t)F\transpose + V_1 \right) - \left( FP(t)H\transpose + V_{12} \right)\left( HP(t)H\transpose + V_{2} \right)^{-1}\left( FP(t)H\transpose + V_{12} \right)\transpose}{\acrshort{dre}}
+\end{flalign}  
+    
+%\begin{align*}
+%    & \hat{x}(t+1|t) = F\hat{x}(t|t-1) + K(t)e(t) && \qquad \text{state equation} \\
+%    & \hat{y}(t|t-1) = H\hat{x}(t|t-1) &&\qquad \text{output equation} \\
+%    & e(t) = y(t) - \hat{y}(t|t-1) &&\qquad \text{output prediction error} \\
+%    & K(t) = \left( FP(t)H\transpose+V_{12} \right) \left( HP(t)H\transpose+V_2 \right)^{-1} &&\qquad \text{gain of the K.F.} \\
+%    & P(t+1) = \left( FP(t)F\transpose + V_1 \right) + &&\\
+%    & - \left( FP(t)H\transpose + V_{12} \right)\left( HP(t)H\transpose + V_{2} \right)^{-1}\left( FP(t)H\transpose + V_{12} \right)\transpose && \qquad\text{difference Riccati equation}
+%\end{align*}
+
+Since \ref{eq:KF-state} and \ref{eq:KF-DRE} are dynamical equations, two initial conditions are needed:
+
+\begin{flalign}\label{eq:KF-initCond-state}    
+    \nameeq{\hat{x}(1|0) = \EE[x(1)] = X_0}{Init. condition for \ref{eq:KF-state}}
+\end{flalign}  
+
+\begin{flalign}\label{eq:KF-initCond-DRE}    
+    \nameeq{P(1) = \text{var}[x(1)] = P_0}{Init. condition for \ref{eq:KF-DRE}}
+\end{flalign}  
+
+\begin{defn}[Difference Riccati Equation (DRE)]
+    The equation \ref{eq:KF-DRE} is called \emph{\acrfull{dre}} and it is a special type of non-linear matrix difference equation. 
+    
+    \quad \textbf{Note} The \gls{dre} is an autonomous (i.e. there are no inputs), non-linear, discrete time, multi-variable system, described by a non-linear difference matrix equation
+    \[
+         P(t+1) = f_{NL}(P(t)) \qquad P(1) = P_0
+    \]
+\end{defn}
+
+
+\begin{rem}[Structure or $K(t)$ and \gls{dre}]
+    Notice that $K(t)$ and the \gls{dre} have  a \emph{block-structure} having this form: 
+    
+    \[ AP(t)B\transpose+N  \qquad \text{where $N$ is a noise matrix}\]
+
+    There are 3 different types of blocks:
+    \begin{align*}
+        \texttt{state:} \qquad& FP(t)F\transpose+V_1 \qquad\text{\small{(since $F$ refers to \ref{eq:KF-state})}}\\
+        \texttt{output:} \qquad& HP(t)H\transpose+V_2 \qquad\text{\small{(since $H$ refers to \ref{eq:KF-out})}}\\
+        \texttt{mix:} \qquad& FP(t)H\transpose+V_{12}
+    \end{align*}
+
+    Therefore
+    \begin{align*}
+        \text{\ref{eq:KF-gain} becomes:} \qquad& K(t) \equiv (\texttt{mix})(\texttt{output})^{-1} \\
+        \text{\ref{eq:KF-DRE} becomes:} \qquad& P(t+1) \equiv (\texttt{state}) - (\texttt{mix})(\texttt{output})^{-1}(\texttt{mix})\transpose
+    \end{align*}
+\end{rem}
+
+
+\begin{rem}[Existance of the \gls{dre}]
+    In order to guarantee the existance of the \gls{dre} for all time instant $t$, the only critical part is the inversion of the \texttt{output} block:
+    \[
+        \underbrace{(\underbrace{HP(t)H\transpose}_{\ge 0} + \underbrace{V_2}_{>0}}_{>0})^{-1} \qquad \text{thanks to $V_2>0$,  \texttt{output} is always invertible}
+    \]
+\end{rem}
+
+\begin{rem}[Meaning of $P(t)$]
+    The symmetric $n \times n$ matrix $P(t)$ has a very important meaning, indeed
+
+    \begin{align*}
+        P(t) = \EE[(x(t) - \hat{x}(t|t-1))(x(t) - \hat{x}(t|t-1))\transpose] 
+        &= \text{Var}[x(t) - \hat{x}(t|t-1)] \\
+        &= \text{Var}[e_x(t)]     
+    \end{align*}
+
+    Therefore, $P(t)$ is the covariance matrix of the 1-step prediction error of the state $x(t)$.
+\end{rem}
+
+
+\subsection{Block-scheme representation of the Kalman Filter}
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node [block] (z1) at (1,6) {$z^{-1}$};
+        \node [block] (F1) at (1,5) {$F$};
+        \node [block] (K) at (1,3) {$K(t)$};
+        \node [block] (z2) at (1,1) {$z^{-1}$};
+        \node [block] (F2) at (1,0) {$F$};
+
+        \node [block] (H1) at (4,6) {$H$};
+        \node [block] (H2) at (4,1) {$H$};
+
+        \node [sum] (sum1) at (6,6) {};
+        \node [sum] (sum2) at (7,3) {};
+        \node [sum] (sum3) at (-1.5,6) {};
+        \node [sum] (sum4) at (-1.5,1) {};
+        
+        \node (v1) at (-1.5,7) {$v_1(t)$};
+        \node (v2) at (6,7) {$v_2(t)$};
+        \node[right] (yt) at (9,6) {$y(t)$};
+        \node[right] (yhat) at (9,1) {$\hat{y}(t|t-1)$};
+        \node[right] (xhat) at (9,0) {$\hat{x}(t|t-1)$};
+
+        \draw[->] (v1) -- (sum3);
+        \draw[->] (sum3) -- (z1) node[midway, above] {$x(t+1)$};
+        \draw[->] (F1) -| (sum3);
+        \draw[->] (z1) -- (H1) node[midway, above] (xt) {$x(t)$};
+        \draw[->] (H1) -- (sum1);
+        \draw[->] (v2) -- (sum1);
+        \draw[->] (xt.south) |- (F1);
+        \draw[->,red,line width=0.3mm] (K) -| (sum4)
+            node[pos=0.3, above] {\emph{feedback}};
+        \draw[->] (sum4) -- (z2) node[midway, above] {$\hat{x}(t+1|t)$};
+        \draw[->] (sum1) -- (yt) node[midway] (yt_c) {};
+        \draw[->] (yt_c.south) -| (sum2) node[right, pos=0.95] {$+$};
+        \draw[<-,red,line width=0.3mm] (K) -- (sum2) node[midway, above,black] {$e(t)$};
+        \draw[->] (z2) -- (H2) node[midway, above] {$\hat{x}(t|t-1)$};
+        \draw[->] (F2) -| (sum4);
+        \draw[->] (2.5,1) |- (F2);
+        \draw[->] (H2) -- (yhat);
+        \draw[->,red,line width=0.3mm] (7,1) -- (sum2) node[right, pos=0.8] {$-$};
+        \draw[->] (F2) -- (xhat);
+        
+        \draw[dashed] ($(sum3) + (-0.5, -1.5)$) rectangle ($(v2) + (0.5, 0.5)$)
+            node[left=0.5cm of v1] {$\Sc$:};
+        
+        \draw[dashed, blue] ($(sum4) + (-0.5, -1.5)$) rectangle ($(sum2) + (0.5, 0.8)$)
+            node[left=2.7cm of K] {$\mathcal{KF}$:};    
+    \end{tikzpicture}
+\end{figure}
+
+The idea behind Kalman Filter is simple and intuitive:
+\begin{itemize}
+    \item we make a simulated replica (\emph{digital twin}) of the system (without noises $v_1$ and $v_2$ which are not measurable)
+    \item we compare the true measured output with the estimated/predicted output $\hat{y}(t|t-1)$
+    \item we make corrections on the \gls{kf} main equation, proportional (with gain $K(t)$) to the output error $e(t)$ in order to keep \gls{kf} as close as possible to the system 
+    \item we extract the state estimation $\hat{x}(t|t-1)$ from the digital twin
+\end{itemize}
+
+\begin{rem}
+    Kalman Filter is a feedback system.
+    Feedback here is not used for control, but for estimation.
+\end{rem}
+
+This general structure was known since $'30$ (before Kalman Filter development) and was called \emph{state observer}.
+Fundamental contribution of Kalman was to find the \textbf{optimal gain $K(t)$}.
+$K(t)$ is not a simple scalar gain but is a (maybe very large) $n\times p$ matrix.
+
+The selection of gain matrix $K(t)$ is very critical:
+\begin{itemize}
+    \item if $K(t)$ is \emph{too small}: the estimation is not optimal because we are \emph{under exploiting} the information in $y(t)$
+    \item if $K(t)$ is \emph{too big}: risk of over-exploiting $y(t)$ and we can get noise amplification, even risk of instability
+\end{itemize}
+
+Design of a Kalman Filter does not require a \emph{training dataset}, but a complete model of the system:
+\begin{itemize}
+    \item $F$, $G$, $H$ matrixes: usually obtained with a white-box physical modelling of the system
+    \item $V_1$, $V_2$ and $V_{12}$: $V_2$ is easily built from sensor specifications, while $V_1$ is much more difficult to be designed (it's the most critical design parameter of \gls{kf}). For what concerns $V_{12}$, we recall that in practice $V_{12} = 0$. 
+\end{itemize}
+
+ 
\ No newline at end of file
diff --git a/lectures/2022_04_27.tex b/lectures/2022_04_27.tex
new file mode 100644
index 0000000..443f476
--- /dev/null
+++ b/lectures/2022_04_27.tex
@@ -0,0 +1,350 @@
+%!TEX root = ../main.tex
+
+\externaldocument{2022_04_21}
+\externaldocument{2022_05_02}
+
+\section{Extensions of the \gls{kf} for General System}\label{sec:KF-extensions}
+
+\subsection{Exogenous input}
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node [block] (z1) at (1,5) {$z^{-1}$};
+        \node [block] (F1) at (1,4) {$F$};
+        \node [block] (K) at (1,2) {$K(t)$};
+        \node [block] (z2) at (1,1) {$z^{-1}$};
+        \node [block] (F2) at (1,0) {$F$};
+
+        \node [block] (H1) at (4,5) {$H$};
+        \node [block] (H2) at (4,1) {$H$};
+
+        \node [block] (G1) at (-3,5) {$G$};
+        \node [block] (G2) at (-3,1) {$G$};
+
+        \node[left] (u) at (-4,5) {$u(t)$};
+
+        \node [sum] (sum1) at (6,5) {};
+        \node [sum] (sum2) at (7,2) {};
+        \node [sum] (sum3) at (-1.5,5) {};
+        \node [sum] (sum4) at (-1.5,1) {};
+
+        \node (v1) at (-1.5,6) {$v_1(t)$};
+        \node (v2) at (6,6) {$v_2(t)$};
+        \node[right] (y) at (8,5) {$y(t)$};
+        \node[right] (yhat) at (8,1) {$\hat{y}(t|t-1)$};
+        \node[right] (xhat) at (8,0) {$\hat{x}(t|t-1)$};
+
+        \node at (-1,2.3) {\emph{feedback}};
+
+        \draw[->,red,line width=0.5mm] (u) -- (G1);
+        \draw[->,red,line width=0.5mm] (G1) -- (sum3);
+        \draw[->,red,line width=0.5mm] (-3.8,5) |- (G2);
+        \draw[->,red,line width=0.5mm] (G2) -- (sum4);
+        \draw[->] (v1) -- (sum3);
+        \draw[->] (sum3) -- (z1) node[pos=0.5] {$x(t+1)$};
+        \draw[->] (F1) -| (sum3);
+        \draw[->] (z1) -- (H1) node[pos=0.5] {$x(t)$};
+        \draw[->] (H1) -- (sum1);
+        \draw[->] (v2) -- (sum1);
+        \draw[->] (2.5,5) |- (F1);
+        \draw[->] (K) -| (sum4);
+        \draw[->] (sum4) -- (z2) node[pos=0.5] {$\hat{x}(t+1|t)$};
+        \draw[->] (7,5) -- (sum2) node[pos=0.8] {$+$};
+        \draw[->] (sum1) -- (y);
+        \draw[<-] (K) -- (sum2) node[pos=0.5,black] {$e(t)$};
+        \draw[->] (z2) -- (H2) node[pos=0.5] {$\hat{x}(t|t-1)$};
+        \draw[->] (F2) -| (sum4);
+        \draw[->] (2.5,1) |- (F2);
+        \draw[->] (H2) -- (yhat);
+        \draw[->] (7,1) -- (sum2) node[pos=0.8] {$-$};
+        \draw[->] (F2) -- (xhat);
+
+        \draw[dashed, blue] ($(G2) + (-1,-2)$) rectangle ($(sum2) + (1,1)$)
+            node[left=1cm of G2] {$\mathcal{KF}$:};
+
+    \end{tikzpicture}
+\end{figure}
+
+Notice that $K(t)$ remains the same because $P(t)$ is the covariance of the prediction error on $x(t)$ which remains the same because $Gu(t)$ doesn't introduce any additional noise or uncertainties to the system.
+That's because $Gu(t)$ is a totally known (deterministic) signal.
+
+\subsection{Multi-step Prediction}
+
+Assuming that $\hat{x}(t+1|t)$ is known from the basic solution, we can simply obtain a multi-step prediction as:
+\begin{align*}
+    \hat{x}(t+2|t) &= F \hat{x}(t+1|t) \\
+    \hat{x}(t+3|t) &= F \hat{x}(t+2|t) = F^2\hat{x}(t+1|t) \\
+    \vdots \\ 
+    \begin{cases}
+        \hat{x}(t+k|t) = F^{k-1} \hat{x}(t+1|t) \\
+        \hat{y}(t+k|t) = H\hat{x}(t+k|t)
+    \end{cases}
+\end{align*}   
+
+
+\subsection{Filter ($\hat{x}(t|t)$)}
+
+\[
+    \hat{x}(t+1|t) = F\hat{x}(t|t) \quad \implies \quad \hat{x}(t|t) = F^{-1}\hat{x}(t+1|t)
+\]
+This formula can be used only if $F$ is invertible.
+If $F$ is not invertible, the filter can be obtained with a specific \emph{filter} formulation of \gls{kf}
+
+\begin{defn}[Kalman Filter in Filter Form] \label{KF-Filter_Form_sol}
+    Reformulation of the equations described in \ref{subsec:KF-basic_sol}.\\
+    State equation (\ref{eq:KF-DRE}) and Gain equation (\ref{eq:KF-gain}) becomes  
+    \begin{align*}
+        \hat{x}(t|t) &= F\hat{x}(t-1|t-1) + Gu(t-1) + K_0(t)e(t) \\
+        K_0(t) &= \left(P(t)H\transpose\right) \left(HP(t)H\transpose+V_2\right)^{-1} \\
+    \end{align*}
+    while the \gls{dre} equation (\ref{eq:KF-DRE}) remains unchanged.\\
+    
+    The initial condition for the new State equation is 
+    \begin{align*}
+        \hat{x}(1|1) &= X_0 
+    \end{align*}
+\end{defn}
+
+\begin{rem}
+    These equations are valid under the (legit) assumption $V_{12} = 0$.
+\end{rem}
+
+\begin{rem}
+    Gain of \gls{kf} in prediction form (eq. \ref{eq:KF-gain} assuming $V_{12}=0$):
+    \[
+        K(t) = \left( FP(t)H\transpose \right) \left( HP(t)H\transpose+V_2 \right)^{-1}
+    \]
+
+    Gain of \gls{kf} in filter form:
+    \[
+        K_0(t) = \left(\textcolor{gray}{\cancel{F}} P(t)H\transpose \right) \left( HP(t)H\transpose+V_2 \right)^{-1}
+    \]
+
+    Therefore, the only difference is the elimination of $F$.
+\end{rem}
+
+
+\subsection{Time-varying systems}\label{subsec:time-varying_extension}
+In case of time-varying systems we have to perform the following substitutions:
+\begin{align*}
+    F \mapsto F(t) \\
+    G \mapsto G(t) \\
+    H \mapsto H(t)
+\end{align*}
+
+Therefore
+\[
+    \Sc: \begin{cases}
+        x(t+1) = F(t)x(t) + G(t)u(t) + v_1(t) \\
+        y(t) = H(t)x(t) + v_2(t)
+    \end{cases}
+\]
+
+\begin{defn} [Linear Time Varying (LTV) system]
+    The system $\Sc$ is a \emph{Linear Time Varying (LTV)} system, since its parameters ($F,G,H$) depends on the time instant $t$.
+\end{defn}
+
+Kalman Filter equations remain of the same form as the ones described in \ref{subsec:KF-basic_sol}: we just need to replace the parameter matrices ($F,G,H$) with the time-varying ones ($F(t),G(t),H(t)$).
+
+\subsection{Non-Linear Systems}
+
+This extensions is much more complicated. We will see Extended Kalman Filter (EKF) in section \ref{subsec:KF_non-lin_ext}.
+
+\section{Asymptotic Solution of Kalman Filter}
+
+\begin{rem} 
+    Even when the system $\Sc$ is an LTI, the Kalman Filter is not itself an LTI system: it is an LTV system, since it depends on the gain $K(t)$ which is time-varying.
+\end{rem}
+
+
+The fact that \gls{kf} is an LTV system is the source of 2 problems:
+\begin{itemize}
+    \item Checking the asymptotic stability of \gls{kf} algorithm is very difficult, since the stability check of an LTV system is not simple as the stability check for LTI.
+    \item Computational problem: $K(t)$ must be computed at each sampling time (e.g. every 5ms), including the inversion of $HP(t)H\transpose+V_2$ ($p\times p$ matrix) and the computation of $P(t)$ using the \gls{dre}.
+\end{itemize}
+
+\begin{rem}[Asymptotic Stability of a system]
+
+    If the system is: 
+    \begin{itemize}
+        \item LTI: $x(t+1) = Fx(t) + Gu(t)$ \\ 
+        \qquad the stability check considers only the sign of the eigenvalues of $F$.
+
+        \item LTV: $x(t+1) = F(t)x(t) + G(t)u(t)$ \\
+        \qquad even if all the eigenvalues of $F(t)$ are strictly inside the unit circle at any time, the system is not guaranteed asymptotically stable.
+        In practice it is, if the time-variations are \emph{slow} (e.g. aging).
+    \end{itemize}
+
+\end{rem}
+
+Because of those problems in real/practical applications the asymptotic version of \gls{kf} is preferred.
+
+\paragraph{Basic idea}
+Since the dependency on $t$ of $K(t)$ derives from the presence of $P(t)$, if $P(t)$ converges to a constant value $\bar{P}$ (steady-state value of $P(t)$), then also $K(t)$ will converge to $\bar{K}$ (steady-state value of $K(t)$).
+\\
+Using $\bar{K}$ the \gls{kf} becomes an LTI system.
+
+Let's analyze the asymptotic stability of the State equation (eq. \ref{eq:KF-state}) of the asymptotic \gls{kf} when $\bar{K}$ is used (assuming it exists).
+
+\begin{align*}
+    \hat{x}(t+1|t) &= F\hat{x}(t|t-1) + Gu(t) + \bar{K}e(t) \\
+    &= F\hat{x}(t|t-1) + Gu(t) + \bar{K}(y(t) - \hat{y}(t|t-1)) \\
+    &= F\hat{x}(t|t-1) + Gu(t) + \bar{K}(y(t) - H\hat{x}(t|t-1)) \\
+    &= \underbrace{(F - \bar{K}H)}_{\text{new state matrix}} \hat{x}(t|t-1) + Gu(t) + \bar{K}y(t)
+\end{align*}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node[block ] (K) at (1,1) {$\bar{K}$};
+        \node[block ] (G) at (1,2) {$G$};
+        \node[sum] (sum) at (3,1) {};
+        \node[block ] (z) at (6,1) {$z^{-1}$};
+        \node[block ] (fb) at (6,0) {$F-\bar{K}H$};
+
+        \node[left] (y) at (0,1) {$y(t)$};
+        \node[left] (u) at (0,2) {$u(t)$};
+
+        \draw[->] (y) -- (K);
+        \draw[->] (u) -- (G);
+        \draw[->] (K) -- (sum);
+        \draw[->] (sum) -- (z) node[pos=0.5] {$\hat{x}(t+1|t)$};
+        \draw[->] (z) -- (9,1) node[pos=0.5] {$\hat{x}(t|t-1)$};
+        \draw[->] (fb) -| (sum);
+        \draw[->] (G) -| (sum);
+        \draw[->] (7.5,1) |- (fb);
+    \end{tikzpicture}
+    \caption*{Asymptotic Kalman Filter with Exogenous input}
+\end{figure}
+
+\paragraph{Condition for Asymptotic Stability of \gls{kf}}
+
+ If $\bar{K}$ exists, the \gls{kf} is asymptotically stable if and only if all the eigenvalues of $F-\bar{K}H$ are strictly inside the unit circle.
+
+\begin{rem}
+    The stability of the system $\Sc$ is related to matrix $F$, whereas the stability of \gls{kf} is related to matrix $F-\bar{K}H$.
+
+    Therefore, \gls{kf} can be asymptotically stable even if the system is unstable.
+\end{rem}
+
+\paragraph{Existance of $\bar{K}$}
+Starting from the equation \ref{eq:KF-gain}, if exists $P(t)$ such that $P(t) = \bar{P}$ then $K(t)$ becomes
+
+\[
+    \bar{K} = \left(F\bar{P}H\transpose + V_{12}\right)\left(H\bar{P}H\transpose+V_2\right)^{-1}
+\]
+
+Thus, $\bar{K}$ exists if $\bar{P}$ exists.\\
+
+In order to check that, we need to check the converge properties of \gls{dre}
+
+\begin{rem}[Stability of a dynamical autonomous system]
+    How to find the equilibrium points of a dynamical autonomous system?
+    \begin{center}
+        \begin{tabular}{c|c}
+            \textbf{Continuous time} & \textbf{Discrete time} \\
+            \hline\hline
+            $\dot{x} = f(x(t))$ & $x(t+1) = f(x(t))$ \\
+            \hline \\
+            equilibrium when $\dot{x} = 0$ & equilibrium when $x(t+1) = x(t)$ \\
+            $\downarrow$ & $\downarrow$ \\
+            $f(\bar{x}) = 0$ & $f(\bar{x}) = \bar{x}$ \\
+        \end{tabular}
+    \end{center}
+\end{rem}
+\gls{dre} is an autonomous discrete time system, thus we impose $\bar{P} = f(\bar{P})$, where $f(\bar{P})$ is the \gls{dre} (eq. \ref{eq:KF-DRE}) evaluated in $\bar{P}$:
+
+\begin{flalign}\label{eq:KF-ARE}
+    \nameeq{\bar{P} = \left( F\bar{P}F\transpose + V_1 \right)-\left(F\bar{P}H\transpose + V_{12}\right)\left(H\bar{P}H\transpose + V_2\right)^{-1}\left(F\bar{P}H\transpose+V_{12}\right)\transpose}{\acrshort{are}}
+\end{flalign}
+
+\begin{defn}[Algebraic Riccati Equation (ARE)]
+    The equation \ref{eq:KF-ARE} is a non-linear, matrix, static algebraic equation, known as \emph{\acrfull{are}}.
+\end{defn}
+
+If a steady state $\bar{P}$ solution of \gls{dre} (eq. \ref{eq:KF-DRE}) does exists, it must be a solution of ARE (eq. \ref{eq:KF-ARE}).
+There remains 3 questions:
+\begin{enumerate}
+    \item \textbf{Existence}: does \gls{are} have a semi-definite positive solution $\bar{P}$?
+    \item \textbf{Convergence}: if exists, does the \gls{dre} converges to $\bar{P}$?
+    \item \textbf{Stability}: is the corresponding $\bar{K}$ such that the \gls{kf} is asymptotically stable?
+\end{enumerate}
+
+
+To answer those questions we need two fundamental theorems (\gls{kf} asymptotic theorems).
+
+\subsection{Asymptotic Kalman Filter Theorems}
+The two Asymptotic Kalman Filter Theorems that we are going to introduce provide \emph{sufficient} conditions only.
+
+\begin{thm}[First Asymptotic \gls{kf} Theorem]\label{th:1KF_as}
+    Assumptions: $V_{12} = 0$ and the system is asymptotically stable (i.e. all eigenvalues of $F$ are strictly inside the unit circle).
+    Then:
+    \begin{enumerate}
+        \item \gls{are} has one and only one semi-definite positive solution: $\bar{P} \ge 0$.
+        \item \gls{dre} converges to $\bar{P}$, $\forall P_0 \ge 0$ ($P_0$: initial semi-definite positive condition).
+        \item The corresponding $\bar{K}$ is s.t. the \gls{kf} is asymptotically stable (i.e. all the eigenvalues of $F-\bar{K}H$ have absolute value less than $1$).
+    \end{enumerate}
+\end{thm}
+
+\begin{rem}[Observability and Controllability]
+    Recall on Observability and Controllability needed for the introduction of the Second Asymptotic \gls{kf} Theorem (\ref{th:2KF_as}). 
+
+    \paragraph{Observability of the state through the output} 
+
+    The pair $(F, H)$ is observable if and only if
+    \[
+        O = \begin{bmatrix}
+            H \\
+            HF \\
+            \vdots \\
+            HF^{n-1}
+        \end{bmatrix}
+        \qquad
+        \text{is full rank}
+    \]
+
+    \paragraph{Controllability from the noise} 
+
+    We are interested in controllability from $v_1(t)$ (and not from $u(t)$).
+
+    \begin{align*}
+        x(t+1) = Fx(t) + \cancel{Gu(t)} + v_1(t) \qquad v_1(t) \sim WN(0, V_1)
+    \end{align*}
+
+    % \begin{align*}
+    %     x(t+1) = Fx(t) + v_1(t)
+    % \end{align*}
+
+    It's always possible to factorize $V_1 = \Gamma\cdot\Gamma^T$ rewriting
+    \[
+        x(t+1) = Fx(t) + \Gamma\omega(t) \qquad \omega(t) \sim WN(0, I)
+    \]
+
+    We can say that the state $x$ is controllable/reachable from the input noise $v_1(t)$ if and only if:
+    \[
+        R = \begin{bmatrix}
+            \Gamma & F\Gamma & \cdots & F^{n-1}\Gamma
+        \end{bmatrix}
+        \qquad
+        \text{is full rank}
+    \]
+\end{rem}
+
+
+\begin{thm}[Second Asymptotic \gls{kf} Theorem]\label{th:2KF_as}
+    Assumptions: $V_{12} = 0$, $(F, H)$ is observable and $(F, \Gamma)$ is controllable.
+    Then:
+    \begin{enumerate}
+        \item \gls{are} has one and only one definite positive solution $\bar{P} > 0$.
+        \item \gls{dre} converges to $\bar{P}$, $\forall P_0 \ge 0$ ($P_0$: initial semi-definite positive condition).
+        \item The corresponding $\bar{K}$ is such that the \gls{kf} is asymptotically stable (i.e. all the eigenvalues of $F-\bar{K}H$ have absolute value less than $1$).
+    \end{enumerate}
+\end{thm}
+
+\begin{rem}
+    The difference between theorems \ref{th:1KF_as} and \ref{th:2KF_as} is that the first one ensures that $\bar{P} \ge 0$ while the second one ensures that $\bar{P} > 0$. 
+\end{rem}
+
+These two theorems are very useful in practice because we can fully avoid the (very difficult) direct convergence analysis of \gls{dre}.
+
+
diff --git a/lectures/2022_05_02.tex b/lectures/2022_05_02.tex
new file mode 100644
index 0000000..ebe9b4b
--- /dev/null
+++ b/lectures/2022_05_02.tex
@@ -0,0 +1,773 @@
+%!TEX root = ../main.tex
+
+\externaldocument{2022_04_27}
+
+\begin{exa}
+    \[
+        S: \begin{cases}
+            x(t+1) = \frac{1}{2}x(t) + v_1(t) \quad& v_1 \sim WN(0, \frac{19}{20}) \\
+            y(t) = 2x(t) + v_2(t) \quad& v_2 \sim WN(0, 1)
+        \end{cases}
+        \qquad
+        v_1 \perp v_2
+    \]
+
+    \paragraph{Question} Find (if exists) the steady state (asymp.) \gls{kf} $\hat{x}(t+1|t)$ and $\hat{x}(t|t)$.
+
+    \[
+        n = 1 \qquad F = \frac{1}{2} \qquad G = 0 \qquad H = 2 \qquad V_1 = \frac{19}{20} \qquad V_2 = 1 \qquad V_{12} = 0
+    \]
+
+    Since $V_{12} = 0$ we can try to use the asymptotic theorems.
+
+    \subparagraph{First step} Compute the \gls{dre}
+    \begin{align*}
+        P(t+1) &= \left( FP(t)F^T+V_1 \right) - \left( FP(t)H^T+V_{12} \right) \left( HP(t)H^T+V_2 \right)^{-1}\left( FP(t)H^T+V_{12} \right)^T \\
+        &= \frac{1}{4}P(t) + \frac{19}{20} - \frac{ \left(\frac{1}{\cancel{2}}P(t)\cancel{2}\right)^2 }{4P(t)+1} \\
+        &= \frac{\cancel{P(t)^2} + \frac{1}{4}P(t) + \frac{19}{5} P(t) + \frac{19}{20} - \cancel{P(t)^2} }{4P(t) + 1}
+    \end{align*}
+
+    \textbf{Note} The second order terms must cancel out.
+
+    \[
+        P(t+1) = \frac{81P(t) + 19}{80P(t)+20}
+    \]
+
+    \subparagraph{Second step} Compute and solve the \gls{are}
+
+    \[
+        \overline{P} = \frac{81\overline{P} + 19}{80\overline{P}+20} \quad \implies \quad 80\overline{P}^2 + 20\overline{P}-81\overline{P}-19 = 0
+    \]
+    \[
+        \overline{P}_1 = 1 \qquad \cancel{\overline{P}_2 = -\frac{19}{80}} < 0
+    \]
+
+
+    $\overline{P}=1$ is the only definite positive solution of \gls{are}
+
+    \paragraph{Question} Does \gls{dre} converges to $\overline{P}=1$, $\forall P_0 \ge 0$?
+
+    There are 2 methods for addressing this question:
+
+    \begin{itemize}
+        \item Direct analysis of \gls{dre}
+        \item Using asymptotic theorems
+    \end{itemize}
+
+    \subparagraph{First methods} Direct analysis of \gls{dre}
+
+    \begin{align*}
+        P(t+1) = f(P(t)) \qquad \text{we need to plot $f(\cdot)$ in the $P(t)$ -- $P(t+1)$ plane}
+    \end{align*}
+
+    \[
+        \overline{P} = \frac{81\overline{P} + 19}{80\overline{P}+20}
+        \qquad
+        \begin{cases}
+            \text{vertical asy. value} & P(t) = -\frac{20}{80} = -\frac{1}{4} \\
+            \text{horizontal asy. value} & P(t) = \frac{81}{80}
+        \end{cases}
+    \]
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \fill [pattern=flexible hatch, pattern color=gray!40] (-2.5,-2) -- (5.5,-2) -- (5.5,0) -- (0,0) -- (0,4.9) -- (-2.5,4.9) -- (-2.5,-2);
+            \draw[->] (-2.5,0) -- (6,0) node[below] {$P(t)$};
+            \draw[->] (0,-2) -- (0,5) node[above] {$P(t+1)$};
+            \draw[domain=-1.25:5,smooth,variable=\x,red] plot ({\x},{5*(\x+1)/(\x+2)});
+
+            \draw[dashed] (-1.5,-2) -- (-1.5,4.9);
+            \node[above left] at (-1.5,0) {$-\frac{1}{4}$};
+            \draw[dashed] (-2.5,4.5) -- (5.5,4.5);
+            \node[below left] at (0,4.5) {$\frac{81}{80}$};
+            \node[above left] at (0,2.5) {$\frac{19}{20}$};
+
+            \draw (-1.8,-1.8) -- (5,5) node[left] {$P(t+1) = P(t)$};
+
+            \draw[->,dotted] (1,0) -- (1,3.3);
+            \draw[->,dotted] (1,3.3) -- (3.3,3.3);
+            \draw[->,dotted] (3.3,3.3) -- (3.3,4.0);
+            \draw[->,dotted] (3.3,4.0) -- (4.0,4.0);
+            \draw[->,dotted] (4.0,4.0) -- (4.0,4.2);
+            \draw[mark=*,red] plot coordinates {(4.2,4.2)} node[black,below] {$\overline{P}_1$};
+            \draw[mark=*,red] plot coordinates {(-1.2,-1.2)} node[black,below right] {$\overline{P}_2$};
+            \draw[mark=*,blue] plot coordinates {(1,3.35)} node[black,above] {\tiny$P(2)$};
+            \draw[mark=*,blue] plot coordinates {(3.3,4.05)} node[black,above] {\tiny$P(3)$};
+            \draw[mark=*,blue] plot coordinates {(4,4.2)};
+            \node[below] at (1,0) {\tiny$P(1) = P_0 \ge 0$};
+
+            \draw[->,red] (1.3,3.9) arc[radius=5,start angle=115,end angle=95];
+            \node[red,rotate=15] at (2,4.3) {\tiny converges};
+        \end{tikzpicture}
+    \end{figure}
+
+    By direct analysis/inspection of \gls{dre} dynamics we can conclude that $\forall P_0\ge 0$, \gls{dre} always converges to $\overline{P}_1=1$.
+
+    If $n=1$ the direct inspection is feasible, but it's very difficult for $n\ge2$.
+
+    \subparagraph{Second method} Use theorems
+
+    \[
+        V_{12} = 0 \qquad F = \frac{1}{2} \text{ ($S$ is stable)} \quad \implies \quad \text{First theorem is fulfilled}
+    \]
+
+    The observability matrix of $\{F, H\}$ is $O=\begin{bmatrix} 2 \end{bmatrix}$ with full $\rank O = 1$, the system is fully observable.
+
+    Controllability from noise $v_1(t)$
+    \[
+        V_1 = \frac{19}{20} \qquad \Gamma=\sqrt{\frac{19}{20}}
+    \]
+    The controllability matrix of $\{F, \Gamma\}$ is $R = \begin{bmatrix} \sqrt{\frac{19}{20}} \end{bmatrix}$ with full $\rank R = 1$, the system is fully controllable from noise.
+
+    Both theorems are fulfilled, so \gls{are} has one and only one solution $\overline{P} > 0$, \gls{dre} converges to $\overline{P}$, $\forall P_0\ge0$ and $\overline{K}$ makes the \gls{kf} asymptotically stable.
+
+    \subparagraph{Third step} Compute $\overline{K}$
+    \[
+        \overline{K} = \left(F\overline{P}H^T + V_{12}\right)\left(H\overline{P}H^T+V_2\right) = \left(\frac{1}{2}\cdot 1 \cdot 2 + 0\right) \left(2\cdot 1 \cdot 2 + 1\right)^{-1} = \frac{1}{5}
+    \]
+
+    Double-check the asymptotical stability of \gls{kf}
+    \[
+        F - \overline{K}H = \frac{1}{2} - \frac{1}{5}\cdot 2 = \frac{5-4}{10} = \frac{1}{10}
+    \]
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block] (z1) at (1,4) {$z^{-1}$};
+            \node [block] (F1) at (1,3) {$\frac{1}{2}$};
+            \node [block] (K) at (1,2) {$\frac{1}{5}$};
+            \node [block] (z2) at (1,1) {$z^{-1}$};
+            \node [block] (F2) at (1,0) {$\frac{1}{2}$};
+
+            \node [block] (H1) at (4,4) {$2$};
+            \node [block] (H2) at (4,1) {$2$};
+
+            \node [sum] (sum1) at (6,4) {};
+            \node [sum] (sum2) at (7,2) {};
+            \node [sum] (sum3) at (-1.5,4) {};
+            \node [sum] (sum4) at (-1.5,1) {};
+
+            \node (v1) at (-1.5,5) {$v_1(t)$};
+            \node (v2) at (6,5) {$v_2(t)$};
+            \node[right] (y) at (8,4) {$y(t)$};
+            \node[right] (yhat) at (8,1) {$\hat{y}(t|t-1)$};
+            \node[right] (xhat) at (8,0) {$\hat{x}(t|t-1)$};
+
+            \draw[->] (v1) -- (sum3);
+            \draw[->] (sum3) -- (z1) node[pos=0.5] {$x(t+1)$};
+            \draw[->] (F1) -| (sum3);
+            \draw[->] (z1) -- (H1) node[pos=0.5] {$x(t)$};
+            \draw[->] (H1) -- (sum1);
+            \draw[->] (v2) -- (sum1);
+            \draw[->] (2.5,4) |- (F1);
+            \draw[->] (K) -| (sum4);
+            \draw[->] (sum4) -- (z2) node[pos=0.5] {$\hat{x}(t+1|t)$};
+            \draw[->] (7,4) -- (sum2) node[pos=0.8] {$+$};
+            \draw[->] (sum1) -- (y);
+            \draw[<-] (K) -- (sum2) node[pos=0.5,black] {$e(t)$};
+            \draw[->] (z2) -- (H2) node[pos=0.5] {$\hat{x}(t|t-1)$};
+            \draw[->] (F2) -| (sum4);
+            \draw[->] (2.5,1) |- (F2);
+            \draw[->] (H2) -- (yhat);
+            \draw[->] (7,1) -- (sum2) node[pos=0.8] {$-$};
+            \draw[->] (F2) -- (xhat);
+        \end{tikzpicture}
+    \end{figure}
+
+    \paragraph{Question} Find \gls{tf} from $y(t)$ to $\hat{x}(t|t-1)$
+
+    \textbf{Recall} \gls{tf} from block schemes of feedback systems.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node[block] (G1) at (0,1) {$G_1(z)$};
+            \node[block] (G2) at (0,0) {$G_2(z)$};
+            \node[sum] (sum) at (-1.5,1) {};
+            \node (u) at (-3,1) {$u(t)$};
+            \node (y) at (2.5,1) {$y(t)$};
+
+            \draw[->] (u) -- (sum) node[pos=0.8] {$+$};
+            \draw[->] (sum) -- (G1);
+            \draw[->] (G1) -- (y);
+            \draw[->] (G2) -| (sum) node[pos=0.8] {$\mp$};
+            \draw[->] (1.5,1) |- (G2);
+        \end{tikzpicture}
+    \end{figure}
+
+    \[
+        y(t) = G_1(z) \left(u(t) \mp G_2(z)y(t)\right) \quad \implies \quad y(t) = \frac{G_1(z)}{1 \pm G_1(z)G_2(z)}u(t)
+    \]
+
+    The \gls{kf} is composed of 2 nested loops.
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node [block] (z1) at (1,4) {$z^{-1}$};
+            \node [block] (F1) at (1,3) {$\frac{1}{2}$};
+            \node [block] (K) at (1,2) {$\frac{1}{5}$};
+            \node [block] (z2) at (1,1) {$z^{-1}$};
+            \node [block] (F2) at (1,0) {$\frac{1}{2}$};
+
+            \node [block] (H1) at (4,4) {$2$};
+            \node [block] (H2) at (4,1) {$2$};
+
+            \node [sum] (sum1) at (6,4) {};
+            \node [sum] (sum2) at (7,2) {};
+            \node [sum] (sum3) at (-1.5,4) {};
+            \node [sum] (sum4) at (-1.5,1) {};
+
+            \node (v1) at (-1.5,5) {$v_1(t)$};
+            \node (v2) at (6,5) {$v_2(t)$};
+            \node[right] (y) at (8,4) {$y(t)$};
+            \node[right] (yhat) at (8,1) {$\hat{y}(t|t-1)$};
+            \node[right] (xhat) at (8,0) {$\hat{x}(t|t-1)$};
+
+            \draw[->] (v1) -- (sum3);
+            \draw[->] (sum3) -- (z1) node[pos=0.5] {$x(t+1)$};
+            \draw[->] (F1) -| (sum3);
+            \draw[->] (z1) -- (H1) node[pos=0.5] {$x(t)$};
+            \draw[->] (H1) -- (sum1);
+            \draw[->] (v2) -- (sum1);
+            \draw[->] (2.5,4) |- (F1);
+            \draw[->,red,line width=0.5mm] (K) -| (sum4);
+            \draw[->,red,line width=0.5mm] (sum4) -- (z2) node[pos=0.5,black] {$\hat{x}(t+1|t)$};
+            \draw[->] (7,4) -- (sum2) node[pos=0.8] {$+$};
+            \draw[->] (sum1) -- (y);
+            \draw[<-] (K) -- (sum2) node[pos=0.5,black] {$e(t)$};
+            \draw[->,red,line width=0.5mm] (z2) -- (H2) node[pos=0.5,black] {$\hat{x}(t|t-1)$};
+            \draw[->,red,line width=0.5mm] (F2) -| (sum4);
+            \draw[->,red,line width=0.5mm] (2.5,1) |- (F2);
+            \draw[->] (H2) -- (yhat);
+            \draw[->] (7,1) -- (sum2) node[pos=0.8] {$-$};
+            \draw[->] (2.5,0) -- (xhat);
+
+            \node[red] at (-0.5,0.5) {$\frac{z^{-1}}{1-\frac{1}{2}z^{-1}}$};
+        \end{tikzpicture}
+    \end{figure}
+
+    Predictor of state:
+    \[
+        \hat{x}(t|t-1) = \frac{ \frac{1}{5} \frac{z^{-1}}{1-\frac{1}{2}z^{-1}} }{ 1 + \frac{1}{5} \frac{z^{-1}}{1-\frac{1}{2}z^{-1}} 2 }y(t) = \frac{ \frac{1}{5}z^{-1} }{1-\frac{1}{10}z^{-1}}y(t)
+    \]
+
+    Predictor of output:
+    \[
+        \hat{y}(t|t-1) = H\hat{x}(t|t-1) = \frac{2}{5} \frac{ 1 }{1-\frac{1}{10}z^{-1}}y(t-1)
+    \]
+
+    Filter of state:
+    \[
+        \hat{x}(t|t) = F^{-1}\hat{x}(t+1|t) = \frac{2}{5} \frac{ 1 }{1-\frac{1}{10}z^{-1}}y(t)
+    \]
+\end{exa}
+
+
+\begin{rem}[White noise]
+    In the formulas of Kalman Filter there is a requirement that $v_1(t)$ and $v_2(t)$ must be white noises.
+    In many practical applications this assumption can be too demanding.
+
+    We need a workaround to deal with practical applications where this assumption is not valid.
+    The workaround is a simple trick called \emph{state extension} and it consists on extending the model of $\Sc$ to incorporate the \emph{noise dynamics}.
+
+    Let's see how it works with a numerical example.
+
+    \paragraph{Example}
+
+    Given a system $\Sc$ of order $n=1$
+    \[
+        \Sc:
+        \begin{cases}
+            x(t+1) = ax(t) + \eta(t) \qquad & \eta(t) \text{ is not a white noise}\\
+            y(t) = bx(t) + v_2(t) \qquad & v_2(t) \sim \WN(0,1)
+        \end{cases}
+    \]
+
+    and a model of $\eta(t)$ (\acrshort{ar}(1) stochastic model)
+    \[
+        \eta(t) = \frac{1}{1-cz^{-1}}e(t) \qquad e(t) \sim \WN(0,1) \qquad e \perp v_2
+    \]
+   
+    We cannot apply \gls{kf} formula to this system, since $\eta(t)$ is not a \gls{wn}, but we can proceed as follows
+    \begin{align*}
+        \eta(t) = c\eta(t-1) + e(t) \xRightarrow{z} \eta(t+1) &= c\eta(t) + e(t+1) \\
+        \eta(t+1) &= c\eta(t) + v(t)
+    \end{align*}
+    where $v(t) = e(t+1), \quad v \sim \WN(0,1) \text{and} \quad v \perp v_2$.
+
+    \paragraph{Trick} Extension of the state vector.
+
+    \begin{align*}
+        x(t) \mapsto x_1(t) \\
+        \eta(t) \mapsto x_2(t)
+    \end{align*}
+
+    Now $n=2$ and $\Sc$ becomes $\Sc'$
+
+    \[
+        \Sc':
+        \begin{cases}
+            x_1(t+1) = ax_1(t) + x_2(t) \\
+            x_2(t+1) = cx_2(t) + v(t) \\
+            y(t) = bx_1(t) + v_2(t)
+        \end{cases}
+    \]
+
+    \[
+        F\begin{bmatrix}
+            a & 1 \\
+            0 & c
+        \end{bmatrix}
+        \quad H = \begin{bmatrix}
+            b & 0
+        \end{bmatrix}
+        \quad v_1 = \begin{bmatrix}
+            0 \\ v(t)
+        \end{bmatrix}
+        \quad V_1 = \begin{bmatrix}
+            0 & 0 \\
+            0 & 1
+        \end{bmatrix}
+        \quad v_2 \sim WN(0,1)
+        \quad V_{12} = 0
+    \]
+
+    Now we can apply \gls{kf} solution equations (\ref{subsec:KF-basic_sol}) to this system.
+\end{rem}
+
+\subsection{Extension to Non-Linear systems}\label{subsec:KF_non-lin_ext}
+Coming back to extensions of the \gls{kf} for the basic system (section \ref{sec:KF-extensions}), we consider a system with non-linear dynamics:
+\[
+    S: \begin{cases}
+        x(t+1) = f(x(t), u(t)) + v_1(t) \\
+        y(t) = h(x(t)) + v_2(t)
+    \end{cases}
+\]
+
+where $f$ and $h$ are non-linear functions of $x(t)$ and $u(t)$ (smoothness class $C^1$ or higher), for example
+
+\vspace{-15pt}
+
+\[
+    f(x(t), u(t)) = \frac{1}{2} x^5(t) + u^3(t)  , \qquad h(x(t)) = e^{x(t)} + v_2(t)
+\]
+
+How can we design a Kalman Filter in this case? We can follow the general idea of the \emph{state observer}. 
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node [block] (z1) at (1,4) {$z^{-1}$};
+        \node [block, double, double distance=0.5mm] (F1) at (-1.5,3) {$f$};
+        \node [block, red] (K) at (1,2) {$?$};
+        \node [block] (z2) at (1,1) {$z^{-1}$};
+        \node [block, double, double distance=0.5mm] (F2) at (-1.5,0) {$f$};
+
+        \node [block, double, double distance=0.5mm] (H1) at (4,4) {$h$};
+        \node [block, double, double distance=0.5mm] (H2) at (4,1) {$h$};
+
+        \node [sum] (sum1) at (6,4) {};
+        \node [sum] (sum2) at (7,2) {};
+        \node [sum] (sum3) at (-1.5,4) {};
+        \node [sum] (sum4) at (-1.5,1) {};
+
+        \node (v1) at (-1.5,5) {$v_1(t)$};
+        \node (v2) at (6,5) {$v_2(t)$};
+        \node[right] (y) at (8,4) {$y(t)$};
+        \node[right] (yhat) at (8,1) {$\hat{y}(t|t-1)$};
+        \node[right] (xhat) at (8,0) {$\hat{x}(t|t-1)$};
+
+        \draw[<-] (F1) -- ++(-2, 0)
+            node[above, midway] (u_t) {$u(t)$};
+        \draw[->] (u_t.south) |- (F2.west);
+        \draw[->] (v1) -- (sum3);
+        \draw[->] (sum3) -- (z1) node[pos=0.5] {$x(t+1)$};
+        \draw[->] (F1) -- (sum3);
+        \draw[->] (z1) -- (H1) node[pos=0.5] {$x(t)$};
+        \draw[->] (H1) -- (sum1);
+        \draw[->] (v2) -- (sum1);
+        \draw[->] (2.5,4) |- (F1);
+        \draw[->] (K) -| (sum4);
+        \draw[->] (sum4) -- (z2) node[pos=0.5] {$\hat{x}(t+1|t)$};
+        \draw[->] (7,4) -- (sum2) node[pos=0.8] {$+$};
+        \draw[->] (sum1) -- (y);
+        \draw[<-] (K) -- (sum2) node[pos=0.5,black] {$e(t)$};
+        \draw[->] (z2) -- (H2) node[pos=0.5] {$\hat{x}(t|t-1)$};
+        \draw[->] (F2) -- (sum4);
+        \draw[->] (2.5,1) |- (F2);
+        \draw[->] (H2) -- (yhat);
+        \draw[->] (7,1) -- (sum2) node[pos=0.8] {$-$};
+        \draw[->] (F2) -- (xhat);
+
+        \draw[blue, dashed] ($(F2) + (-1,-1)$) rectangle ($(sum2) + (0.5, 0.5)$) 
+            node[above left = 1.7cm and 1cm of F2] {$\mathcal{EKF}$:};
+    \end{tikzpicture}
+\end{figure}
+
+For the gain block of \gls{kf} we have 2 different types of solutions:
+\begin{enumerate}
+    \item The gain is a non-linear function of $e(t)$ (most natural and intuitive solution)
+    \item The gain is a linear time-varying function
+\end{enumerate}
+
+The second solution is less intuitive but is the most effective: we can reuse most of the \gls{kf} theory's formulas for the LTV domain.
+
+In practice, Extended Kalman Filter (EKF) idea is to make a time-varying local linearization (approximation) of the non-linear time-invariant system at each sampling time.
+
+The gain equation $K(t)$ in Extended Kalman Filter can be computed as:
+\[
+    K(t) = \left( F(t) P(t) H(t)^T + V_{12} \right) \left( H(t) P(t) H(t)^T + V_2 \right)^{-1}
+\]
+and $P(t)$ can be computed from the \gls{dre}:
+\[
+    P(t+1) = \left( F(t)P(t)F(t)^T+V_1 \right) - \left( F(t)P(t)H(t)^T + V_{12}\right)\left( H(t)P(t)H(t)^T + V_2\right)^{-1}\left( F(t)P(t)H(t)^T + V_{12}\right)^T
+\]
+
+\begin{rem}
+    Equations of $K$ and \gls{dre} are the usual formulas of \gls{kf} (\ref{subsec:KF-basic_sol}) with the introduction of time-varying parameters (as said in \ref{subsec:time-varying_extension}).
+\end{rem}
+
+\paragraph{Linearization of the system}
+We linearize $f$ and $h$ by computing at each sampling time, respectively, $F(t)$ and $H(t)$ as:
+
+\begin{align*}
+    F(t) &=  \left.\frac{ \partial f(x(t), u(t))}{\partial x(t)} \right|_{x(t) = \hat{x}(t|t-1)} \\
+    H(t) &= \left. \frac{\partial h(x(t))}{\partial x(t)} \right|_{x(t) = \hat{x}(t|t-1)}
+\end{align*}
+
+Therefore, EKF is the time-varying solution of \gls{kf} where $F(t)$ and $H(t)$ are local linearized matrices computed around the last available state prediction $\hat{x}(t|t-1)$.
+
+\paragraph{Summary}
+Procedure to implement EKF at time $t$:
+\begin{enumerate}
+    \item take last available state prediction $\hat{x}(t|t-1)$
+    \item using $\hat{x}(t|t-1)$, linearize the system by computing $F(t)$ and $H(t)$
+    \item compute $K(t)$ and update the \gls{dre}
+    \item compute $\hat{x}(t+1|t)$
+\end{enumerate}
+
+\begin{rem}
+    Main issues of EKF (same of LTV \gls{kf}):
+    \begin{itemize}
+        \item Very difficult (almost impossible) to have a theoretical guarantee of EKF stability (in practice extensive empirical testing is used).
+        \item Computational load (at each time $F(t)$, $H(t)$, $K(t)$ and $P(t)$ must be computed at run-time).
+    \end{itemize}
+
+    However, EKF is largely used today with some limitations in:
+    \begin{itemize}
+        \item safety-critical applications
+        \item mission-critical applications
+    \end{itemize}
+\end{rem}
+
+\begin{exa}[\gls{kf} full procedure]\label{ex:KF_full-proc}
+
+Suspended seat in the cabin of an off-highway vehicle (agriculture tractor, earth-moving machine, etc\dots).
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw[decoration={aspect=0.3, segment length=1.5mm, amplitude=3mm,coil},decorate] (-1,1) -- (-1,2);
+            \draw (0,2) -- (0,2.5);
+            \draw (-1,2) -- (1,2);
+            \draw (-1,1) -- (1,1);
+
+            \draw (1,2) -- (1,1.5);
+            \draw (0.8, 1.7) -- (0.8, 1.3) -- (1.2, 1.3) -- (1.2, 1.7);
+            \draw (1,1) -- (1,1.3);
+            \draw (0,0.7) -- (0,1);
+
+            \fill (-0.2,0.5) rectangle (0.2,0.7);
+            \draw (-1.5,2.5) -- (1.5,2.5) -- (1.5,5.5) -- (1.1,5.5) -- (1.1,2.9) -- (-1.5,2.9) -- (-1.5,2.5);
+
+            \draw[pattern=north east lines] (-3,0) rectangle (3,0.5);
+            \draw (-3,0.5) arc[radius=6,start angle=180, end angle=90] -- (3,0.5);
+
+            \draw[->] (0.4,0.55) -- (0.4,0.9) node[right,pos=0.5] {\footnotesize vertical acc.};
+            \draw[->] (-1.4,0.55) -- (-1.4,2.45) node[left,pos=0.5] {\footnotesize elongation};
+        \end{tikzpicture}
+    \end{figure}
+
+    The physical sensors are:
+    \begin{itemize}
+        \item Vertical accelerometer placed at the basis of the cabin
+        \item Elongation sensor between the basis of the cabin and the seat
+    \end{itemize}
+
+    \paragraph{Problem} Estimation of the seat vertical speed.
+
+    \paragraph{Step 1: System Modeling} Move from a pictorial representation to a schematic representation of it:
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \draw (-1,2.5) rectangle (1,3.5);
+            \draw[decoration={aspect=0.3, segment length=1.5mm, amplitude=3mm,coil},decorate] (-1,1) -- (-1,2);
+            \draw (0,2) -- (0,2.5);
+            \draw (-1,2) -- (1,2);
+            \draw (-1,1) -- (1,1);
+
+            \draw (1,2) -- (1,1.5);
+            \draw (0.8, 1.7) -- (0.8, 1.3) -- (1.2, 1.3) -- (1.2, 1.7);
+            \draw (1,1) -- (1,1.3);
+            \draw (0,0.7) -- (0,1);
+
+            \draw plot[smooth, tension=1] coordinates {(-2, 0) (0,0.7) (2,0)};
+
+            \fill [pattern=north east lines] (2.1,0) rectangle (2.7,-0.1);
+
+            \draw (2.1,0) -- (2.7,0);
+            \draw[dotted] (0,1) -- (2.2,1);
+            \draw[dotted] (0,2.5) -- (2.6,2.5);
+
+            \draw [->] (2.2,0) -- (2.2,1);
+            \draw [->] (2.6,0) -- (2.6,2.5);
+
+            \node at (0,3) {$M$};
+            \node[right] at (1.1,3.7) {\footnotesize full mass of seat+driver};
+
+            \node at (-1.7,1.5) {$K$};
+            \node[left] at (-1.4,1.8) {\footnotesize spring stiffness};
+
+            \node at (1.5,1.5) {$C$};
+            \node[right] at (1.3,1.8) {\footnotesize damping ratio};
+
+            \node[left] at (2.2,0.5) {$z_d$};
+            \node[right] at (2.6,1.25) {$z$};
+        \end{tikzpicture}
+    \end{figure}
+
+    The sensor are:
+    \begin{itemize}
+        \item Acceleration $\ddot{z}_d$ (+noise)
+        \item Elongation $z-z_d$ (+noise)
+    \end{itemize}
+
+    Model of the system dynamics (physical white-box model in continuous time domain).
+
+    Core model equation (force balance in vertical direction):
+    \begin{align*}
+        M\ddot{z} &= \underbrace{-C\frac{d}{dt}(z-z_d)}_\text{damping} \underbrace{-K(z-z_d)}_{spring} \cancel{+ \text{gravity} - Mg} \\
+                  &= -C(\dot{z} - \dot{z}_d) - K(z-z_d)
+    \end{align*}
+
+    Since we measure $\ddot{z}_d$ the overall dimension of the system is 4.
+    The vector of state variables is:
+    \[
+        x(t) = \begin{bmatrix}
+            z \\
+            \dot{z} \\
+            z_d \\
+            \dot{z}_d
+        \end{bmatrix} = \begin{bmatrix}
+            x_1(t) \\
+            x_2(t) \\
+            x_3(t) \\
+            x_4(t) \\
+        \end{bmatrix}
+        \qquad
+        u(t) = \ddot{z}_d
+        \qquad
+        y(t) = z-z_d
+    \]
+
+    We can write the full model in state space form
+    \[
+        \begin{cases}
+            \dot{x}_1 = x_2 \\
+            \dot{x}_2 = -\frac{C}{M} (x_2-x_4) - \frac{K}{M}(x_1-x_3) \\
+            \dot{x}_3 = x_4 \\
+            \dot{x}_4 = u \\
+            y = x_1-x_3
+        \end{cases}
+        \quad\Rightarrow\quad
+        \begin{cases}
+            \dot{x}_1 = x_2 \\
+            \dot{x}_2 = -\frac{K}{M}x_1 - \frac{C}{M}x_2 + \frac{K}{M}x_3 + \frac{C}{M} x_4 \\
+            \dot{x}_3 = x_4 \\
+            \dot{x}_4 = u \\
+            y = x_1-x_3
+        \end{cases}
+    \]
+    \[
+        A = \begin{bmatrix}
+            0 & 1 & 0 & 0 \\
+            -\frac{K}{M} & -\frac{C}{M} & \frac{K}{M} & \frac{C}{M} \\
+            0 & 0 & 0 & 1 \\
+            0 & 0 & 0 & 0
+        \end{bmatrix}
+        \quad
+        B = \begin{bmatrix}
+            0 \\ 0 \\ 0 \\ 1
+        \end{bmatrix}
+        \quad
+        C = \begin{bmatrix}
+            1 & 0 & -1 & 0
+        \end{bmatrix}
+    \]
+
+    \paragraph{Step 2: Discretization}
+    Next is discretization (we use digital systems), choice of sampling time $\Delta$ (for this application can be 5ms).
+
+    Using \emph{Eulero-forward approximation} (see \nameref{appendix:discr}) of time derivative $\dot{x}(t) \approx \frac{x(t+1)-x(t)}{\Delta}$.
+
+    Example for the first equation:
+    \[
+        \frac{x_1(t+1)-x_1(t)}{\Delta} = x_2(t) \quad\Rightarrow\quad x_1(t+1) = x_1(t) + \Delta x_2(t)
+    \]
+
+    \paragraph{Step 3: Add Noises} In order to apply \gls{kf} formulas we need to add noises $v_1, v_2$ to the model. 
+    The discretized system with noises is:
+
+    \[  
+        \Sc:
+        \begin{cases}
+            x_1(t+1) = x_1(t) + \Delta x_2(t) + v_{11}(t) \\
+            x_2(t+1) = -\frac{\Delta K}{M} x_1(t) + \left( -\frac{\Delta C}{M} + 1 \right)x_2(t) + \frac{\Delta K}{M} x_3(t) + \frac{\Delta C}{M} x_4(t) + v_{12}(t) \\
+            x_3(t+1) = x_3(t) + \Delta x_4(t) + v_{13}(t) \\
+            x_4(t+1) = x_4(t) + \Delta u(t) + v_{14}(t) \\
+            y(t) = x_1(t) - x_3(t) + v_2(t)
+        \end{cases}
+    \]
+
+    This is normal state-space in discrete time:
+    \[  
+        \Sc:
+        \begin{cases}
+            x(t+1) = Fx(t) + Gu(t) \\
+            y(t) = Hx(t) + \cancelto{0}{D}u(t)
+        \end{cases}
+    \]
+    \[
+        F = \begin{bmatrix}
+            1 & \Delta & 0 & 0 \\
+            -\frac{\Delta K}{M} & -\frac{\Delta C}{M}+1 & \frac{\Delta K}{M} & \frac{\Delta C}{M} \\
+            0 & 0 & 1 & \Delta \\
+            0 & 0 & 0 & 1
+        \end{bmatrix}
+        \quad G = \begin{bmatrix}
+            0 \\ 0 \\ 0 \\ \Delta
+        \end{bmatrix}
+        \quad H = \begin{bmatrix}
+            1 & 0 & -1 & 0
+        \end{bmatrix}
+    \]
+
+    Noise on the state equations:
+    \[
+        v_1(t) = \begin{bmatrix}
+            v_{11}(t) \\
+            v_{12}(t) \\
+            v_{13}(t) \\
+            v_{14}(t)
+        \end{bmatrix}
+        \sim WN(0, V_1)
+        \qquad
+        V_1 = \begin{bmatrix}
+            \lambda_1^2 & 0 & 0 & 0\\
+            0 & \lambda_1^2 & 0 & 0 \\
+            0 & 0 & \lambda_1^2 & 0 \\
+            0 & 0 & 0 & \lambda_1^2 \\
+        \end{bmatrix}
+        \qquad
+        v_2(t) \sim WN(0, V_2)
+    \]
+    Assumptions:
+    \begin{itemize}
+        \item All white-noises
+        \item All uncorrelated
+    \end{itemize}
+
+    $V_2$ can be estimated by datasheet of elongation sensor, $\lambda_4$ can be estimated by datasheet of accelerometer.
+    We expect $\lambda_1$, $\lambda_2$ and $\lambda_3$ to be small, and we can use the simplifying assumption $\lambda_1^2=\lambda_2^2=\lambda_3^2 =\lambda^2$, estimated empirically.
+
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node[left] (u) at (0,0) {$u(t)$};
+            \node[block] (d1) at (1,0) {$\Delta$};
+            \node[sum] (s1) at (2,0) {};
+            \node[above] (v14) at (2,0.5) {$v_{14}(t)$};
+            \node[block] (z4) at (4,0) {$z^{-1}$};
+            \node[block] (b4) at (6,1) {$\frac{\Delta C}{M}$};
+            \node[block] (d2) at (4,1) {$\Delta$};
+
+            \node[sum] (s2) at (2,2.5) {};
+            \node[above] (v13) at (2,3) {$v_{13}(t)$};
+            \node[block] (z3) at (4,2.5) {$z^{-1}$};
+            \node[block] (b3) at (6,4.5) {$\frac{\Delta K}{M}$};
+
+            \node[sum] (s3) at (7.5,4.5) {};
+            \node[above] (v12) at (7.5,5) {$v_{12}(t)$};
+            \node[block] (z2) at (9.5,4.5) {$z^{-1}$};
+            \node[block] (b2) at (9.5,3.5) {$-\frac{\Delta C}{M}+1$};
+
+            \node[sum] (s4) at (2,8) {};
+            \node[above] (v11) at (2,8.5) {$v_{11}(t)$};
+            \node[block] (z1) at (4,8) {$z^{-1}$};
+            \node[block] (b1) at (6,5.5) {$-\frac{\Delta K}{M}$};
+            \node[block] (d3) at (4,6.5) {$\Delta$};
+
+            \node[sum] (s5) at (11.5,2.5) {};
+            \node[below] (v2) at (11.5,2) {$v_2(t)$};
+            \node[right] (y) at (12,2.5) {$y(t)$};
+
+            \draw[->] (u) -- (d1);
+            \draw[->] (d1) -- (s1);
+            \draw[->] (v14) -- (s1);
+            \draw[->] (s1) -- (z4) node[pos=0.5] {\footnotesize $x_4(t+1)$};
+            \draw[->] (z4) -- (8,0) node[above] {\footnotesize $x_4(t)$};
+            \draw[->] (4.7,0) -- (4.7,-0.5) -- (2,-0.5) -- (s1);
+            \draw[->] (4.7,0) |- (d2);
+            \draw[->] (6,0) -- (b4);
+
+            \draw[->] (d2) -| (s2);
+            \draw[->] (v13) -- (s2);
+            \draw[->] (s2) -- (z3) node[pos=0.5] {\footnotesize $x_3(t+1)$};
+            \draw[->] (z3) -- (s5) node[pos=0.1] {\footnotesize $x_3(t)$};
+            \draw[->] (4.7,2.5) -- (4.7,2) -- (2.5,2) -- (s2);
+            \draw[->] (6,2.5) -- (b3);
+
+            \draw[->] (b3) -- (s3);
+            \draw[->] (v12) -- (s3);
+            \draw[->] (b4) -- (7,1) -- (7,4) -- (s3);
+            \draw[->] (b1) -- (7,5.5) -- (7,5) -- (s3);
+            \draw[->] (s3) -- (z2) node[pos=0.5] {\footnotesize $x_2(t+1)$};
+            \draw[->] (z2) -- (11,4.5) node[above] {\footnotesize $x_2(t)$};
+            \draw[->] (10.5,4.5) |- (b2);
+            \draw[->] (b2) -| (s3);
+
+            \draw[->] (v11) -- (s4);
+            \draw[->] (s4) -- (z1) node[pos=0.5] {\footnotesize $x_1(t+1)$};
+            \draw[->] (10.5,4.5) |- (d3);
+            \draw[->] (d3) -| (s4);
+            \draw[->] (4.7,8) -- (4.7,7.5) -- (2.5,7.5) -- (s4);
+            \draw[->] (z1) -| (s5) node[pos=0.05] {\footnotesize $x_1(t)$};
+            \draw[->] (6,8) -- (b1);
+
+            \draw[->] (v2) -- (s5);
+            \draw[->] (s5) -- (y);
+        \end{tikzpicture}
+    \end{figure}
+
+    We can compute
+    \[
+        O = \begin{bmatrix}
+            H \\
+            HF \\
+            HF^2 \\
+            HF^3
+        \end{bmatrix}
+        \qquad
+        R = \begin{bmatrix}
+            G & FG & F^2G & F^3G
+        \end{bmatrix}
+    \]
+    From visual inspection of the block schema we expect full observability from output and full controllability from input.
+
+    \gls{kf} at this point can be applied.
+    Theorem \ref{th:1KF_as} and \ref{th:2KF_as} are applicable, thus we can directly jump to ARE solution.
+\end{exa}    
diff --git a/lectures/2022_05_09.tex b/lectures/2022_05_09.tex
new file mode 100644
index 0000000..b55d122
--- /dev/null
+++ b/lectures/2022_05_09.tex
@@ -0,0 +1,505 @@
+%!TEX root = ../main.tex
+
+% --- Posticipated chapters --- 
+\chapter{Software-sensing with Black-Box Methods}
+\input{lectures/2022_05_17}
+
+\chapter{Gray-Box System Identification}\label{ch5}
+\input{lectures/2022_05_23}
+% -----------------------------
+
+\chapter{Minimum Variance Control}
+Minimum Variance Control (MVC) is about design and analysis of feedback systems, it is not about system identification nor software sensing.
+
+Why we dedicate a chapter on \emph{control}?
+\begin{itemize}
+    \item Control design is the main motivation to system identification and software sensing.
+    \item MVC is based on \emph{mathematics} of system identification and software sensing (prediction theory).
+    \item MVC can be considered as a general tool of stochastic optimization of feedback systems.
+\end{itemize}
+
+\section{MVC System}
+
+\paragraph{Setup of the problem} Consider a generic \gls{armax} model
+
+\begin{rem}[\gls{armax} system]
+	\[
+	    y(t) = \frac{B(z)}{A(z)}u(t-k) + \frac{C(z)}{A(z)}e(t) \qquad e(t) \sim WN(0, \lambda^2)
+	\]
+	\begin{align*}
+	    B(z) &= b_0 + b_1z^{-1} + \dots + b_pz^{-p} \\
+	    A(z) &= 1   + a_1z^{-1} + \dots + a_mz^{-m} \\
+	    C(z) &= 1   + c_1z^{-1} + \dots + c_nz^{-n}
+	\end{align*}
+
+	\textbf{Note} The input is $u(t)$ and the output is $y(t)$. The noise $e(t)$ is a non-measurable input that models the uncertainty of the system.  
+\end{rem}
+
+\subparagraph{Assumptions}
+\begin{itemize}
+    \item $\frac{C(z)}{A(z)}$ is in \emph{canonical form}
+    \item $b_0\ne 0$ (this implies that $k \ge 1$ is the \emph{pure delay}, which is not greater than $k$)
+    \item $\frac{B(z)}{A(z)}$ is \emph{minimum phase}
+\end{itemize}
+
+\begin{rem}[Transfer function in canonical form]
+    A transfer function $W(z) = \frac{C(z)}{A(z)}$ is in \emph{canonical form} if 
+    \begin{enumerate}
+        \item $C(z)$ and $A(z)$ are monic (i.e. the coefficients of the maximum degree terms of $C(z)$ and $A(z)$ are equal to 1);
+        \item $C(z)$ and $A(z)$ have null relative degree (i.e. they share the same degree);
+        \item $C(z)$ and $A(z)$ are coprime (i.e. they have no common factors);
+        \item[4a.]the poles of $W(z)$ are such that $|z| < 1$;
+        \item[4b.]the zeros of $W(z)$ are such that $|z| < 1$. (more stringent condition than the one we saw in MIDA1)
+    \end{enumerate}
+\end{rem}
+
+\begin{rem}[Minimum Phase filter]
+    A filter described by a \gls{tf} $\frac{B(z)}{A(z)}$ is said to be \emph{minimum phase} if all the roots of $B(z)$ are strictly inside the unit circle.
+\end{rem}
+
+\begin{rem}[Minimum Phase filter in practice]
+    What does it means in practice? 
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \draw[->] (0,4) -- (0,7) node[left] {$u(t)$};
+            \draw[->] (0,4) -- (5,4) node[below] {$t$};
+            \draw[->] (0,0) -- (0,3) node[left] {$y(t)$};
+            \draw[->] (0,0) -- (5,0) node[below] {$t$};
+
+            \draw[line width=0.3mm] (0,4) -- (1,4) -- (1,6) -- (5,6);
+            \draw[dotted] (1,0) -- (1,4);
+            \draw[dotted] (0,2.5) -- (5,2.5) node[right] {\footnotesize steady state};
+
+            \draw[domain=1:4.5,smooth,variable=\x,green,samples=70] plot ({\x},{2.5-2.5*(1-(\x-1)/3.5)^5});
+            \draw[domain=1:4.5,smooth,variable=\x,green,samples=70] plot ({\x},{2.5-2.5*(1-(\x-1)/3.5)^5+sin(\x*180)/\x});
+            \draw[domain=1:4.5,smooth,variable=\x,red,samples=70] plot ({\x},{2.5-2.5*(1-(\x-1)/3.5)^5+3*sin(\x*180)/(\x^2)});
+
+            \node[red,right] at (1.5,-0.5) {\footnotesize non- minimum phase};
+            \node[green,right] at (2,1.5) {\footnotesize minimum phase};
+        \end{tikzpicture}
+    \end{figure}
+
+    At the very beginning the response of a \textcolor{red}{non-minimum phase} system goes to the opposite direction w.r.t. the final value. Intuitively it's very difficult to control non-minimum phase systems: you can take the wrong decision if you react immediately.
+
+    Also for human it's difficult, for example \emph{steer to roll} dynamics in a bicycle: if you want to steer left, you must first steer a little to the right and then turn left.
+
+    Design of controller for non-minimum phase is difficult and requires special design techniques (no MVC but \emph{generalized MVC (GMVC)}, described in \ref{subsec:GMVC}).
+\end{rem}
+
+\paragraph{Goal of the problem} 
+The problem we wish to solve is the optimal tracking of the desired behavior of the output (which is the classical goal of control systems):
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block,ellipse,align=center] at (-1,0) (cont) {controller\\algorithm};
+        \node[block] at (2.5,0) (ba) {$\frac{B(z)}{A(z)}z^{-k}$};
+        \node[block] at (4,1.5) (ca) {$\frac{C(z)}{A(z)}$};
+        \node[sum] at (4,0) (sum) {};
+
+        \draw[dotted] (0.8,-1) rectangle (5,3.2) node[right] {system to be controlled};
+        \draw[->] (cont) -- (ba) node[pos=0.7] {$u(t)$};
+        \draw[->] (ba) -- (sum);
+        \draw[->] (ca) -- (sum);
+        \draw[<-] (cont) -- ++(-2,0) node[left] {$y^0(t)$};
+        \draw[<-] (ca) -- ++(0,1) node[above] {$e(t)$};
+        \draw[->] (sum) -- ++(2,0) node[right] {$y(t)$};
+        \draw[->] (5.5,0) -- (5.5,-1.5) -- (-1,-1.5) -- (cont);
+    \end{tikzpicture}
+\end{figure}
+where $y^0(t)$ os the desired output value, called \emph{reference}.
+
+Some additional (small) technical \textbf{assumptions}:
+\begin{itemize}
+    \item $y^0(t)$ and $e(t)$ are not correlated (usually fulfilled).
+    \item We assume (worst case) that $y^0(t)$ is known only up to time $t$ (present time): we have no preview of the future desired $y^0(t)$ (i.e. $y^0(t)$ is totally unpredictable or $\hat{y^0}(t+k|t) = y^0(t)$).
+\end{itemize} 
+
+In a more formal way MVC is an optimization control problem that tries to find $u(t)$ that minimize this performance index:
+\[
+    J = E\left[ (y(t) - y^0(t))^2 \right]
+\]
+
+where $J$ is the variance of the tracking error: that's why it's called Minimum Variance Control.
+
+\paragraph{Solution of the problem}
+The main trick is to split $y(t)$ into $\hat{y}(t|t-k)$, the \emph{predictor}, and $\epsilon(t)$, the \emph{prediction error}.
+
+Now we can write $J$ as 
+\begin{align*}
+	J =& \EE\left[ \left( \hat{y}(t|t-k) + \epsilon(t) - y^0(t) \right)^2 \right] \\
+	  =& \EE\left[ \left( (\hat{y}(t|t-k) - y^0(t)) + \epsilon(t) \right)^2 \right] \\
+	  =& \EE\left[ \left( (\hat{y}(t|t-k) - y^0(t) \right)^2 \right] + \EE[\epsilon(t)^2 ] + 2 \EE\left[ \epsilon(t) \left( \hat{y}(t|t-k) - y^0(t) \right) \right] 
+\end{align*}
+
+where we note that
+
+\begin{itemize}
+	\item $\EE[\epsilon(t)^2 ]$ doesn't depend on $u(t)$: it's not subject to minimization, since it's just a constant number.
+	\item $\EE[ \epsilon(t) y^0(t) ] = 0$, since $\epsilon(t) = f(e(t), e(t-1), \dots) $ and, by assumption, $y^0 \perp e$.
+	\item $\EE[ \epsilon(t) \hat{y}(t|t-k) ] = 0$, since by construction $\epsilon(t) \perp \hat{y}(t|t-k)$ (otherwise $\hat{y}(t|t-k)$ wouldn't be the optimal predictor).
+\end{itemize}
+
+Therefore, minimizing $J$ is equivalent to minimizing  $\EE\left[ \left( (\hat{y}(t|t-k) - y^0(t) \right)^2 \right]$, which is minimized if
+
+\begin{align}\label{eq:min_J_condition}
+	\hat{y}(t|t-k) = y^0(t)
+\end{align}
+
+Now we have to compute $\hat{y}(t|t-k)$ and impose $\hat{y}(t+k|t) = y^0(t+k)$ (obtained by shifting the equation \ref{eq:min_J_condition} using the $z$ operator).
+
+However, by assumption, at time $t$ we don't know $y^0(t+k)$: the best we can do is to replace it with the last available value of $y^0$, that is $y^0(t)$.
+
+Therefore, the condition \ref{eq:min_J_condition} becomes 
+
+\begin{align}\label{eq:new_min_J_condition}
+	\hat{y}(t+k|t) = y^0(t)
+\end{align}
+
+\begin{rem}[$k$-step predictor of an \gls{armax} system]
+
+	\[
+		\hat{y}(t+k|t) = \frac{B(z) E(z)}{C(z)} u(t) + \frac{\tilde{R}(z)}{C(z)} y(t)
+	\] 
+
+	where $E(z)$ is the $result$ and $R(z) = \tilde{R}(z) z^{-k}$ is the $residual$ of the $k$-step long division between $C(z)$ and $A(z)$.
+
+\end{rem}
+
+By plugging in the $k$-step predictor of an \gls{armax} system formula in the equation \ref{eq:new_min_J_condition} we obtain
+
+\[
+	\frac{B(z) E(z)}{C(z)} u(t) + \frac{\tilde{R}(z)}{C(z)} y(t) = y^0(t)
+\]
+
+and by making $u(t)$ explicit we obtain the \emph{General Formula of MVC}:
+
+\begin{align}\label{eq:MVC_general-formula}
+	u(t) = \frac{1}{B(z)E(z)} \left( C(z) y^0(t) - \tilde{R}(z) y(t) \right)
+\end{align}
+
+The block scheme representation of the MVC system is
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[sum] at (0,0) (sum) {};
+        \node[block, left=1cm of sum] (c1) {$C(z)$}; 
+        \node[block] at (1.5,0) (b1) {$\frac{1}{B(z) E(z)}$};
+        \node[block] at (5,0) (b2) {$\frac{B(z)}{A(z)} z^{-k}$};
+        \node[block] at (3,-1.5) (b3) {$\tilde{R}(z)$};
+        \node[block] at (7,1.5) (b4) {$\frac{C(z)}{A(z)}$};
+        \node[sum] at (7,0) (sum2) {};
+
+        \draw[<-] (c1) --++ (-1.5,0) {} node[left] (in) {$y^0(t)$};
+        \draw[->] (sum) -- (b1);
+        \draw[->] (b1) -- (b2) node[pos=0.5] {$u(t)$};
+        \draw[->] (b3) -| (sum) node[pos=0.9] {$-$};
+        \draw[->] (c1) -- (sum)  node[above, near end] {$+$};
+        \draw[->] (sum2) -- ++(1.5,0) node[right] {$y(t)$};
+        \draw[->] (b2) -- (sum2);
+        \draw[->] (b4) -- (sum2);
+        \draw[<-] (b4) -- ++(0,1) node[above] (noise_in) {$e(t)$};
+        \draw[->] (8,0) |- (b3);
+
+        \draw[dashed, blue] ($(in) + (0.9, -2.5)$) rectangle ($(b2) + (-1.2, 1)$) node[above=0.6cm of c1] {\text{MVC (controller)}};
+        \draw[dashed] ($(b2) + (-1, -0.6)$) rectangle ($(noise_in) + (0.7, 0.3)$) node[right] {\text{system}};
+    \end{tikzpicture}
+
+    \caption{MVC System}
+    \label{fig:MVC_sys}
+\end{figure}
+
+\clearpage
+
+\section{Analysis of the MVC System}
+
+\subsection{Stability of the MVC System}\label{subsec:MVC_stability}
+
+\begin{rem}
+    For stability let's recall a result of \emph{negative feedback system}:
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node[block] at (0,1.5) (f1) {$F_1(t)$};
+            \node[block] at (0,0.5) (f2) {$F_2(t)$};
+            \node[sum] at (-1.5,1.5) (sum) {};
+
+            \draw[<-] (sum) -- ++(-1,0) node[pos=0.2, above] {$+$};
+            \draw[->] (f2) -| (sum) node[pos=0.9] {$-$};
+            \draw[->] (sum) -- (f1);
+            \draw[->] (f1) -- ++(2,0);
+            \draw[->] (1.5,1.5) |- (f2);
+        \end{tikzpicture}
+    \end{figure}
+
+    To check the closed-loop stability:
+    \begin{itemize}
+        \item compute the \emph{loop-function} $L(z) = F_1(z) F_2(z)$ (\textbf{remember}: do not simplify!)
+        \item build the \emph{characteristic polynomial} $\chi(z) = L_N(z) + L_D(z)$ (sum of the numerator and the denominator of $L(z)$)
+        \item find the roots of $\chi(z)$: closed loop system is asymptotically stable if and only if all the roots of $\chi(z)$ are strictly inside the unit circle
+    \end{itemize}
+\end{rem}
+
+Check the stability of the MVC system represented in figure \ref{fig:MVC_sys}
+
+\[
+    L(z) = \frac{1}{B(z)E(z)}\cdot \frac{z^{-k}B(z)}{A(z)}\cdot\tilde{R}(z)
+\]
+\begin{align*}
+    \chi(z) &= z^{-k}B(z)\tilde{R}(z) + B(z)E(z)A(z) \\
+    &= B(z) \underbrace{\left( z^{-k}\tilde{R}(z)+E(z)A(z) \right)}_{C(z)}  \\
+    &= B(z)C(z)
+\end{align*}
+
+where we used the following result of the long division between $C(z)$ and $A(z)$:
+
+\[ \frac{C(z)}{A(z)} = E(z) + \frac{z^{-k} \tilde{R}(z)}{A(z)} \]
+
+The MVC system is always guaranteed asymptotically stable, since the roots of $\chi(z)$ are the roots of $B(z)$ and $C(z)$ and:
+\begin{itemize}
+    \item all roots of $B(z)$ are strictly inside the unit circle (thanks to minimum phase assumption on $\frac{B(z)}{A(z)}$)
+    \item all roots of $C(z)$ are inside the unit circle (thanks to the assumption of canonical representation of $\frac{C(z)}{A(z)}$)
+\end{itemize}
+
+\subsection{Performance of the MVC System}\label{subsec:MVC_performance}
+
+The system can be rewritten as follows considering the two inputs: $y^0(t)$ and $e(t)$ (non-measurable input)
+\[ 
+	y(t) = F_{y^0y}(z) y^0(t) + F_{ey}(z) e(t)
+\]
+
+\begin{rem}
+    The transfer function from the input to the output of a \emph{negative feedback system} can be computed with:
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node[block] at (0,1.5) (f1) {$F_1(t)$};
+            \node[block] at (0,0.5) (f2) {$F_2(t)$};
+            \node[sum] at (-1.5,1.5) (sum) {};
+
+            \draw[<-] (sum) -- ++(-1,0) node[left] {$u(t)$} node[pos=0.2, above] {$+$} ;
+            \draw[->] (f2) -| (sum) node[pos=0.9] {$-$};
+            \draw[->] (sum) -- (f1);
+            \draw[->] (f1) -- ++(2,0) node[right] {$y(t)$};
+            \draw[->] (1.5,1.5) |- (f2);
+        \end{tikzpicture}
+    \end{figure}
+
+    \[ 
+    	F(z) = \frac{F_1(z)}{1 + F_1(z) F_2(z)} \qquad y(t) = F(z) u(t)
+    \]
+    where we recall that $F_1(z) F_2(z)$ is the loop-function $L(z)$
+ 	and $F_1(z)$ is the direct line from the input to the output.
+\end{rem}
+
+Therefore, looking at the figure \ref{fig:MVC_sys}, the \gls{tf} from $y^0(t)$ to $y(t)$ is
+\begin{align*}
+	F_{y^0y}(z) =& \frac{C(z) \cdot \frac{1}{B(z) E(z)} \cdot \frac{z^{-k} B(z)}{A(z)}}{1 + \underbrace{\frac{1}{B(z)E(z)}\cdot \frac{z^{-k}B(z)}{A(z)}\cdot\tilde{R}(z)}_{L(z)}} \\
+	=& \dots \\
+	=& z^{-k}
+\end{align*}
+where we remind that $L(z)$ has been already computed for the stability check (\ref{subsec:MVC_stability}). 
+
+Similarly, the \gls{tf} from $e(t)$ to $y(t)$ is 
+\begin{align*}
+	F_{ey}(z) =& \frac{\frac{C(z)}{A(z)}}{1 + \underbrace{\frac{1}{B(z)E(z)}\cdot \frac{z^{-k}B(z)}{A(z)}\cdot\tilde{R}(z)}_{L(z)}} \\
+	=& \dots \\
+	=& E(z)
+\end{align*}
+
+Thus we can say that
+
+\begin{align*}
+	y(t) =& F_{y^0y}(z) y^0(t) + F_{ey}(z) e(t) \\
+		 =& z^{-k} y^0(t) + E(z) e(t) \\
+         =& y^0(t-k) + E(z) e(t)
+\end{align*}
+ 
+which is the very simple closed-loop relationship between input and output in a MVC system.
+
+% \begin{rem}
+%     There are 2 sub-classes of control problems:
+%     \begin{itemize}
+%         \item When $y^0(t)$ is constant or step-wise (regulation problem)
+%         \item When $y^0(t)$ is varying (tracking problem)
+%     \end{itemize}
+
+%     \begin{figure}[H]
+%         \centering
+%         \begin{minipage}[t]{0.48\textwidth}
+%             \centering
+%             \begin{tikzpicture}[
+%                 node distance=2cm,auto,>=latex',
+%                 declare function={
+%                     f(\x) =  (\x < 0.5) * 1 +
+%                              (\x >= 0.5) * (\x < 2) * 2 +
+%                              (\x >= 2) * (\x < 3) * 3 +
+%                              (\x >= 3) * (\x < 4) * 1.5 +
+%                              (\x >= 4) * 1;
+%                     f2(\x) = (f(\x-0.5) + (f(\x) - f(\x-0.5)) / 720 +
+%                              f(\x-0.4) + (f(\x) - f(\x-0.4)) / 120 +
+%                              f(\x-0.3) + (f(\x) - f(\x-0.3)) / 24 +
+%                              f(\x-0.2) + (f(\x) - f(\x-0.2)) / 6 +
+%                              f(\x-0.1) + (f(\x) - f(\x-0.1)) / 2) / 5 +
+%                              rand/8;
+%                 }
+%             ]
+%                 \draw[->] (0,0) -- (0,3) node[above] {$y(t)$};
+%                 \draw[->] (0,0) -- (5,0) node[below] {$t$};
+%                 \draw[domain=0:5,variable=\x,blue,samples=100] plot ({\x},{f(\x)});
+%                 \draw[domain=0:5,variable=\x,red,smooth,samples=100] plot ({\x},{f2(\x)});
+%             \end{tikzpicture}
+%             \caption*{Regulation problem}
+%         \end{minipage}
+%         \begin{minipage}[t]{0.48\textwidth}
+%             \centering
+%             \begin{tikzpicture}[
+%                 node distance=2cm,auto,>=latex',
+%                 declare function={
+%                     f(\x) =  (sin(\x*180)/2+sin(\x*270)/2)+1.5;
+%                     f2(\x) = (f(\x-0.3) + (f(\x) - f(\x-0.3)) / 24 +
+%                              f(\x-0.2) + (f(\x) - f(\x-0.2)) / 6 +
+%                              f(\x-0.1) + (f(\x) - f(\x-0.1)) / 2) / 3 +
+%                              rand/8;
+%                 }
+%             ]
+%                 \draw[->] (0,0) -- (0,3) node[above] {$y(t)$};
+%                 \draw[->] (0,0) -- (5,0) node[below] {$t$};
+%                 \draw[domain=0:5,variable=\x,blue,samples=70] plot ({\x},{f(\x)});
+%                 \draw[domain=0:5,variable=\x,red,smooth,samples=50] plot ({\x},{f2(\x)});
+%             \end{tikzpicture}
+%             \caption*{Tracking problem}
+%         \end{minipage}
+%     \end{figure}
+% \end{rem}
+
+% Bottom-up way of presenting M.V.C.
+
+% \paragraph{Simplified problem \#1}
+% \[
+%     S: y(t) = ay(t-1) + b_0u(t-1) + b_1u(t-2) \qquad y(t) = \frac{b_0+b_1z^{-1}}{1-az^{-1}}u(t-1)
+% \]
+
+% We assume that $y^0(t)=\overline{y}^0$ (regulation problem) and the system is noise-free.
+% \begin{itemize}
+%     \item $b_0\ne 0$
+%     \item Root of numerator must be inside the unit circle
+% \end{itemize}
+
+% To design the minimum variance controller we must minimize the performance index:
+% \[
+%     J = E\left[ (y(t) - y^0(t))^2 \right]
+% \]
+% There is no noise so we can remove the expected value
+% \begin{align*}
+%     J &= \left( y(t) - y^0(t) \right)^2 = \left( y(t) - \overline{y}^0 \right)^2 = \left( ay(t-1)+b_0u(t-1)+b_1u(t-2) - \overline{y}^0 \right)^2 = \\
+%     &= \left( ay(t) + b_0u(t) + b_1u(t-1)-\overline{y}^0 \right)^2 \\
+%     \frac{\partial J}{\partial u(t)} &= 2\left( ay(t)+b_0u(t)+b_1u(t-1)-\overline{y}^0 \right)\left(b_0\right)
+% \end{align*}
+
+% Why the derivative is just $b_0$? We are at present time $t$ and at time $t$ the control algorithm must take a decision on the value of $u(t)$.
+% At time $t$, $y(t)$, $y(t-1)$, \dots, $u(t-1)$, $u(t-2)$, \dots{} are no longer variables but numbers.
+
+% \[
+%     \frac{\partial J}{\partial u(t)} = 0 \qquad ay(t)+b_0u(t)+b_1u(t-1)-\overline{y}^0 = 0 \\
+% \]
+% \[
+%     u(t) = \left( \overline{y}^0  - ay(t)\right)\frac{1}{b_0+b_1z^{-1}}
+% \]
+% \begin{figure}[H]
+%     \centering
+%     \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+%         \node[sum] at (0,0) (sum) {};
+%         \node[block] at (1.5,0) (b1) {$\frac{1}{b_0+b_1z^{-1}}$};
+%         \node[block] at (5,0) (b2) {$z^{-1}\frac{b_0+b_1z^{-1}}{1-az^{-1}}$};
+%         \node[block] at (3,-1.5) (b3) {$a$};
+
+%         \draw[->] (sum) -- (b1);
+%         \draw[->] (b1) -- (b2) node[pos=0.5] {$u(t)$};
+%         \draw[->] (b3) -| (sum) node[pos=0.9] {-};
+%         \draw[<-] (sum) -- ++(-1.5,0) node[left] {$\overline{y}^0$} node[pos=0.2] {+};
+%         \draw[->] (b2) -- ++(2.5,0) node[right] {$y(t)$};
+%         \draw[->] (6.5,0) |- (b3);
+%     \end{tikzpicture}
+% \end{figure}
+
+% \paragraph{Simplified problem \#2}
+% \[
+%     S: y(t) = ay(t-1) + b_0u(t-1) + b_1u(t-2) + e(t) \qquad e(t) \sim WN(0, \lambda^2)
+% \]
+
+% The reference variable is $y^0(t)$ (tracking problem).
+
+% The performance index is
+% \[
+%     J = E\left[ (y(t) - y^0(t))^2 \right]
+% \]
+
+% The fundamental trick to solve this problem is to re-write $y(t)$ as:
+% \[
+%     y(t) = \hat{y}(t|t-1) + \epsilon(t)
+% \]
+
+% Since $k=1$ we know that $\epsilon(t) = e(t)$, so $y(t) = \hat{y}(t|t-1)+e(t)$.
+% \begin{align*}
+%     J &= E\left[ \left(\hat{y}(t|t-1)+e(t) - y^0(t)\right)^2 \right] \\
+%     &= E\left[   \left((\hat{y}(t|t-1)-y^0(t)) +e(t)\right)^2 \right] \\
+%     &= E\left[ \left(\hat{y}(t|t-1)-y^0(t)\right)^2 \right] + E\left[e(t)^2\right] + \cancel{2E\left[e(t)\left( \hat{y}(t|t-1)-y^0(t) \right)\right]} \\
+% \end{align*}
+
+% Notice that
+% \[
+%     \argmin_{u(t)} \left\{ E\left[ \left(\hat{y}(t|t-1)-y^0(t)\right)^2 \right] + \lambda^2 \right\} = \argmin_{u(t)} \left\{ E\left[ \left(\hat{y}(t|t-1)-y^0(t)\right)^2 \right] \right\}
+% \]
+% The best result is when $\hat{y}(t|t-1)=y^0(t)$, we can force this relationship.
+
+% Now we must compute the 1-step predictor of the system:
+% \[
+%     S: y(t) = \frac{b_0+b_1z^{-1}}{1-az^{-1}}u(t-1) + \frac{1}{1-az^{-1}}e(t)
+% \]
+% Note that this is an $ARMAX(1,0,1+1)=ARX(1,2)$.
+% \[
+%     k=1 \qquad B(z) = b_0+b_1z^{-1} \qquad A(z)=1-az^{-1} \qquad C(z) = 1
+% \]
+
+% General solution for 1-step prediction of ARMAX:
+% \[
+%     \hat{y}(t|t-1) = \frac{B(z)}{C(z)}u(t-1) + \frac{C(z)-A(z)}{C(z)}y(t)
+% \]
+% If we apply this formula we obtain:
+% \[
+%     \hat{y}(t|t-1) = \frac{b_0+b_1z^{-1}}{1}u(t-1) + \frac{1-1+az^{-1}}{1}y(t) = (b_0+b_1z^{-1})u(t-1)+ay(t-1)
+% \]
+
+% Now we can impose that $\hat{y}(t|t-1)=y^0(t)$
+% \[
+%     b_0u(t) + b_1u(t-1) + ay(t) = y^0(t+1) \qquad u(t) = \left( y^0(t+1) - ay(t) \right)\frac{1}{b_0+b_1z^{-1}}
+% \]
+% But we don't have $y^0(t+1)$, so we use $y^0(t)$.
+% \[
+%     u(t) = \left( y^0(t) - ay(t) \right)\frac{1}{b_0+b_1z^{-1}}
+% \]
+% \begin{figure}[H]
+%     \centering
+%     \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+%         \node[sum] at (0,0) (sum) {};
+%         \node[block] at (1.5,0) (b1) {$\frac{1}{b_0+b_1z^{-1}}$};
+%         \node[block] at (5,0) (b2) {$z^{-1}\frac{b_0+b_1z^{-1}}{1-az^{-1}}$};
+%         \node[block] at (3,-1.5) (b3) {$a$};
+%         \node[block] at (7,1.5) (b4) {$\frac{1}{1-az^{-1}}$};
+%         \node[sum] at (7,0) (sum2) {};
+
+%         \draw[->] (sum) -- (b1);
+%         \draw[->] (b1) -- (b2) node[pos=0.5] {$u(t)$};
+%         \draw[->] (b3) -| (sum) node[pos=0.9] {-};
+%         \draw[<-] (sum) -- ++(-1.5,0) node[left] {$y^0(t)$} node[pos=0.2] {+};
+%         \draw[->] (sum2) -- ++(1.5,0) node[right] {$y(t)$};
+%         \draw[->] (b2) -- (sum2);
+%         \draw[->] (b4) -- (sum2);
+%         \draw[<-] (b4) -- ++(0,1) node[above] {$e(t)$};
+%         \draw[->] (8,0) |- (b3);
+%     \end{tikzpicture}
+% \end{figure}
+
diff --git a/lectures/2022_05_12.tex b/lectures/2022_05_12.tex
new file mode 100644
index 0000000..941e312
--- /dev/null
+++ b/lectures/2022_05_12.tex
@@ -0,0 +1,699 @@
+%!TEX root = ../main.tex
+
+\externaldocument{2022_05_09}
+
+\begin{rem}
+    The closed-loop behavior is very simple. Visually:
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node[block] at (0,1.5) (c) {$C(z)$};
+            \node[sum] at (1,1.5) (s1) {};
+            \node[block, right=0.3cm of s1] (b1) {$\frac{1}{B(z) E(z)}$};
+            \node[block, right=0.7cm of b1] (b2) {$\frac{z^{-k} B(z)}{A(z)}$};
+            \node[sum, right=0.5cm of b2] (s2) {};
+            \node[block] at (2,0) (b3) {$\tilde{R}(z)$};
+            \node[block, above=0.5cm of s2] (b4) {$\frac{C(z)}{A(z)}$};
+
+            \draw[<-] (c) -- ++(-1,0) node[left] {$y^0(t)$};
+            \draw[->] (c) -- (s1) node[pos=0.8] {+};
+            \draw[->] (s1) -- (b1);
+            \draw[->] (b1) -- (b2) node[above, midway] {$\scriptstyle u(t)$};
+            \draw[->] (b2) -- (s2);
+            \draw[->] (b3) -| (s1) node[pos=0.9] {-};
+            \draw[->] (b4) -- (s2);
+            \draw[->] (s2) -- ++(1,0) node[right] {$y(t)$};
+            \draw[<-] (b4) -- ++(0,1) node[above] (in_noise_1) {$e(t)$};
+            \draw[->] ($(s2) + (0.5,0)$) |- (b3);
+
+            \node[block] at (11,1.5) (z) {$z^{-k}$};
+            \node[block] at (12,3) (e) {$E(z)$};
+            \node[sum] at (12,1.5) (s3) {};
+
+            \draw[<-] (z) -- ++(-1,0) node[left] (y0_2) {$y^0(t)$};
+            \draw[->] (s3) -- ++(1,0) node[right] (out_2) {$y(t)$};
+            \draw[->] (z) -- (s3);
+            \draw[->] (e) -- (s3);
+            \draw[<-] (e) -- ++(0,1) node[above] {$e(t)$};
+
+            \draw[dashed] (-0.7,-0.7) rectangle ($(in_noise_1) + (0.7, 0.5)$);
+            \draw[dashed] ($(y0_2) + (0.6, -0.5)$) rectangle ($(out_2) + (-0.6, 3)$);
+
+            \node[right=1.5cm of b4] (equiv) {$\equiv$};
+            \node[below=0.001cm of equiv] {(closed-loop)};
+        \end{tikzpicture}
+    \end{figure}
+
+    In principle the \emph{ideal} desired behavior would be $y(t) = y^0(t) + \cancelto{0}{E(z)}e(t)$, but, since the system has internally $k$-step delay and the reference $y^0(t)$ is not predictable, and since it's impossible to obtain noise-free output ($E(z) e(t)$ is the minimum possible noise level), the \emph{optimal} result is the one found in \ref{subsec:MVC_performance}, that is:
+
+    \[
+    	y(t) = y^0(t-k) + E(z) e(t)
+    \]
+
+    \textbf{Note} Where are all the poles of the original system?
+
+    MVC \emph{pushes} all the system poles into the non-observable and/or non-controllable parts of the system (it makes internal cancellations). However,
+    this is something wanted and therefore it is not a problem since we verified it's internally asymptotically stable.
+\end{rem}
+
+\section{Main limitations of MVC}
+
+\begin{itemize}
+    \item Can be applied only to minimum-phase systems.
+    \item We cannot moderate the control/actuation effort.
+    \item We cannot design a specific behavior from $y^0(t)$ to $y(t)$.
+\end{itemize}
+
+
+\subsection{Generalized Minimum Variance Control (GMVC)}\label{subsec:GMVC}
+
+To overcome these limits there is an extension called \emph{Generalized Minimum Variance Control (GMVC)}.
+The main difference w.r.t. MVC is the extension of the performance index:
+
+\begin{align*}
+    \text{MVC: } J &= E\left[ \left(y(t) - y^0(t)\right)^2 \right] \\
+    \text{GMVC: } J &= E\left[ \left(P(z)y(t) - y^0(t) + Q(z)u(t)\right)^2 \right]
+\end{align*}
+where $P(z)$ and $Q(z)$ are \gls{tf}s designed by engineers; in particular:
+\begin{itemize}
+    \item $P(z)$ is the \emph{reference behavior} from $y^0(t)$ to $y(t)$
+    \item $Q(z) u(z)$ is used to penalize the usage of control action, which has to be moderated since it's expensive in terms of energy, power, aging, ...
+\end{itemize}
+
+\begin{rem}
+	The performance index $J$ of the MVC is just a special case of the GMVC's one, where $P(z) = 1$ and $Q(z) = 0$.
+\end{rem}
+
+\begin{rem}[Another important difference w.r.t. MVC]
+	GMVC can be applied also to non-minimum phase systems.
+\end{rem}
+
+\begin{rem}[Reference behavior $P(t)$]
+	In a generic feedback control system
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node[block] at (0,1.5) (c) {Controller};
+            \node[block, right=1.5cm of c] (s) {System};
+
+            \draw[->] (c) -- (s) node[pos=0.5] {$u(t)$};
+            \draw[->] (c) -- (s);
+            \draw[<-] (c.west) -- ++(-1,0) node[left] {$y^0(t)$};
+            \draw[->] (s) -- ++(2,0) node[right] {$y(t)$};
+            \draw[->] (4.3,1.5) -- (4.3,0.5) -| (c);
+        \end{tikzpicture}
+    \end{figure}
+
+    the typical goal is to obtain the best possible tracking $y(t) = y^0(t)$. However, perfect tracking may not be the best solution: sometimes the best solution is to track a \emph{reference model} or \emph{reference behavior} from $y^0(t)$ to $y(t)$:
+    \[
+    	y(t) = P(z)y^0(t)
+    \]
+\end{rem}
+
+\begin{exa}[Cruise control in a car]
+	Cruise control is a typical example of \emph{reference model}'s usage.
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node[block] at (0,1.5) (c) {Cruise Control};
+            \node[block] at (4,1.5) (s) {Car};
+
+            \draw[->] (c) -- (s) node[pos=0.5] {};
+            \draw[->] (c) -- (s) node[above, midway, align=center] {engine torque\\$u(t)$};
+            \draw[<-] (c) -- ++(-2,0) node[left, align=center] {desired\\speed\\$v^0(t)$};
+            \draw[->] (s) -- ++(2,0) node[right, align=center] {speed\\$v(t)$};
+            \draw[->] (3,1.5) -- (3,0.5) -| (c);
+        \end{tikzpicture}
+    \end{figure}
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[
+                node distance=2cm,auto,>=latex',
+                declare function={
+                    f(\x) =  (\x < 0.5) * 1 +
+                            (\x >= 0.5) * (\x < 2) * 2 +
+                            (\x >= 2) * (\x < 4) * 3 +
+                            (\x >= 4) * 1.5;
+                    f2(\x) = f(\x) - (
+                                (f(\x) - f(\x-0*0.1)) * 0.8 +
+                                (f(\x) - f(\x-1*0.1)) * 0.4 +
+                                (f(\x) - f(\x-2*0.1)) * 0.2 +
+                                (f(\x) - f(\x-3*0.1)) * 0.1
+                            ) / 1.5;
+                    f3(\x) = f(\x) - (
+                                (f(\x) - f(\x-0*0.1)) * 0.5 +
+                                (f(\x) - f(\x-1*0.1)) * 0.4 +
+                                (f(\x) - f(\x-2*0.1)) * 0.4 +
+                                (f(\x) - f(\x-3*0.1)) * 0.3 +
+                                (f(\x) - f(\x-4*0.1)) * 0.3 +
+                                (f(\x) - f(\x-5*0.1)) * 0.2 +
+                                (f(\x) - f(\x-6*0.1)) * 0.2 +
+                                (f(\x) - f(\x-7*0.1)) * 0.2 +
+                                (f(\x) - f(\x-8*0.1)) * 0.1 +
+                                (f(\x) - f(\x-9*0.1)) * 0.1 +
+                                (f(\x) - f(\x-10*0.1))* 0.05
+                            ) / 2.75;
+                }
+            ]
+            \draw[->] (0,0) -- (0,3.5) node[above] {$v(t)$};
+            \draw[->] (0,0) -- (5,0) node[below] {$t$};
+            \draw[domain=0:5,variable=\x,blue,samples=100] plot ({\x},{f(\x)}) node[right, blue] {$v^0(t)$};
+            \draw[domain=0:5,variable=\x,red,smooth,samples=50] plot ({\x},{f2(\x)});
+            \draw[domain=0:5,variable=\x,green,smooth,samples=50] plot ({\x},{f3(\x)});
+            \node[red, right]   at (3,1) {$\scriptstyle P(z) = 1$};
+            \node[green, right] at (3,0.7) {$\scriptstyle P(z)$ \footnotesize to smooth the behavior};
+        \end{tikzpicture}
+    \end{figure}
+
+    In this scenario it is intuitive that a very aggressive tracking (i.e. $P(z) = 1$) is unwanted since it is uncomfortable and dangerous. A smoother reference behavior is preferred (i.e. $P(z)$ is a low-pass filter).
+\end{exa}
+
+\appendix
+
+\begin{appendices}
+
+\chapter{Discretization of Analog Dynamical systems}\label{appendix:discr}
+Discretization allows us to do modeling, SW-sensing and control in a digital context. 
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block,ellipse,align=center] at (0,0) (controller) {Control\\algorithm\\(discrete time)};
+        \node[block,align=center] at (6,0) (system) {Physical\\system\\(analog)};
+        \node[block] at (3.5,0) (DAC) {D/A};
+        \node[block] at (3.5,-2) (ADC) {A/D};
+
+        \draw[->] (controller) -- (DAC);
+        \draw[->] (DAC) -- (system) node[pos=0.5] {$\scriptstyle u(t)$};
+        \draw[->] (system) -- ++(2,0) node[above] {$y(t)$};
+        \draw[->] (7.3,0) |- (ADC);
+        \draw[->] (ADC) -| (controller);
+    \end{tikzpicture}
+\end{figure}
+
+\subsection*{Analog to Digital Converter (A/D)}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+            f(\x) = sin(180*\x/3.14)+1.2;
+        }]
+        \draw[->] (0,0) -- (5,0) node[below] {$t$};
+        \draw[->] (0,0) -- (0,3) node[left] {$y(t)$};
+
+        \draw[domain=0:5,variable=\x,smooth] plot ({\x},{f(\x)});
+        \foreach \x in {0,...,9} {
+            \draw[dotted] (\x/2,0) -- (\x/2,3);
+            \draw[fill, blue] (\x/2,{round(2*f(\x/2))/2}) circle (0.03);
+        }
+        \foreach \y in {0,...,5} {
+            \draw[dotted] (0,\y/2) -- (5,\y/2);
+        }
+
+        \draw[<->] (0.5,-0.1) -- (1,-0.1) node[pos=0.5,below] {$\scriptstyle\Delta T$};
+        \node[right] at (1.05,-0.33) {\footnotesize (sampling time)};
+        \draw[<->] (-0.1,1.5) -- (-0.1,2);
+        \node[left,align=right] at (-0.1,1.75) {\footnotesize amplitude\\\footnotesize discretization \\\footnotesize step};
+    \end{tikzpicture}
+\end{figure}
+
+\begin{description}
+    \item[Time discretization] $\Delta T$ is the sampling time
+    \item[Amplitude discretization step] Number of levels used for discretization, e.g. \emph{10-bit discretization} uses $2^{10}$ levels of amplitude
+\end{description}
+
+An high quality A/D converter:
+\begin{itemize}
+    \item small $\Delta T$
+    \item high number of levels (e.g. 16-bits)
+\end{itemize}
+
+
+\subsection*{Digital to Analog Converter (D/A)}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+            f(\x) = sin(180*\x/3.14)+1.2;
+        }]
+        \draw[->] (0,0) -- (5,0) node[below] {$t$};
+        \draw[->] (0,0) -- (0,3) node[left] {$u(t)$};
+
+        \foreach \x in {0,...,9} {
+            \draw[dotted] (\x/2,0) -- (\x/2,3);
+            \draw[fill] (\x/2,{round(2*f(\x/2))/2}) circle (0.03);
+        }
+        \foreach \x in {0,...,8} {
+            \draw[blue] (\x/2,{round(2*f(\x/2))/2}) -- (\x/2+0.5,{round(2*f(\x/2))/2});
+            \draw[blue] (\x/2+0.5,{round(2*f(\x/2))/2}) -- (\x/2+0.5,{round(2*f(\x/2+0.5))/2});
+        }
+        \foreach \y in {0,...,5} {
+            \draw[dotted] (0,\y/2) -- (5,\y/2);
+        }
+    \end{tikzpicture}
+\end{figure}
+
+If $\Delta T$ is sufficiently small, the step-wise analog signal is very similar to a smooth analog signal (the steps become "invisible").
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+            f(\x) = sin(180*\x/3.14)+1.2;
+        }]
+        \draw[->] (0,0) -- (5,0) node[below] {$t$};
+        \draw[->] (0,0) -- (0,3) node[left] {$u(t)$};
+
+        \foreach \x in {0,...,49} {
+            \draw[fill] (\x/10,{round(20*f(\x/10))/20}) circle (0.01);
+        }
+        \foreach \x in {0,...,49} {
+            \draw[blue] (\x/10,{round(20*f(\x/10))/20}) -- (\x/10+0.1,{round(20*f(\x/10))/20});
+            \draw[blue] (\x/10+0.1,{round(20*f(\x/10))/20}) -- (\x/10+0.1,{round(20*f(\x/10+0.1))/20});
+        }
+    \end{tikzpicture}
+\end{figure}
+
+\subsection*{Model of the Digital Perspective}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block,align=center,minimum height=2cm] at (0,0) (cont) {Digital\\controller};
+        \node[block,align=center,minimum height=2cm] at (6,0) (sys) {Analog\\system};
+        \node[block] at (3,0.5) (DAC) {D/A};
+        \node[block] at (3,-0.5) (ADC) {A/D};
+
+        \draw[->] (cont.east|-DAC) -- (DAC) node[pos=0.2] (u_t) {$\scriptstyle u(t)$};
+        \draw[<-] (cont.east|-ADC) -- (ADC) node[pos=0.2] {$\scriptstyle y(t)$};
+        \draw[->] (DAC) -- (DAC-|sys.west);
+        \draw[<-] (ADC) -- (ADC-|sys.west);
+
+        \draw[dashed,red] (1.7,-1.5) rectangle (7.5,1.5) 
+        	node[above=0.3cm of u_t] {\color{red} $\Sc$:};
+        \node[below] at (4.6,-1.5) {\footnotesize System from a digital perspective};
+    \end{tikzpicture}
+\end{figure}
+
+\textbf{Note} Both $u(t)$ and $y(t)$ are digital signal.
+
+We can obtain a discrete-time model from $u(t)$ to $y(t)$ for the system \textcolor{red}{$\Sc$}
+
+\begin{figure}[H]
+	\centering 
+	\begin{tikzpicture}[node distance=2cm,auto,>=latex']
+		\node[block, red] (s) {$\Sc$};
+
+		\draw[<-] (s) --++ (-1,0) node[left] {$u(t)$}; 
+		\draw[->] (s) --++ (1,0) node[right] {$y(t)$}; 
+	\end{tikzpicture}
+\end{figure}
+
+How can we obtain that digital model? We can either:
+
+\begin{itemize}
+    \item make \gls{bb} system identification starting from measured data: directly estimate a discrete-time model
+    \item start from a physical \gls{wb} model (continuous time) that we need to discretize 
+\end{itemize}
+
+Let's analyze the latter method.
+
+\subsubsection*{Discretization of a physical \gls{wb} model}
+We introduce two methods for discretize a physical \gls{wb} model.
+
+\paragraph{1) \acrlong{ss} Transformation}
+
+The most used approach is the \emph{\acrlong{ss} Transformation}.
+
+\[
+    \Sc: \begin{cases}
+        \dot{x} = Ax + Bu \\
+        y = Cx + (Du) \\
+        \qquad \text{($A$, $B$, $C$, $D$ cont. time)}
+    \end{cases}
+    \quad
+    \xRightarrow[\Delta T]{\text{discretization}}
+    \quad
+    \Sc: \begin{cases}
+        x(t+1) = Fx(t) + Gu(t) \\
+        y(t) = Hu(t) + (Du(t)) \\
+        \qquad \text{($F$, $G$, $H$, $D$ discrete time)}
+    \end{cases}
+\]
+
+Discretization is done via those transformation formulas:
+\begin{align*}
+    F &= e^{A\Delta T} \\
+    G &= \int_0^{\Delta T} e^{A\delta}B\, d\delta \\
+    H &= C 
+\end{align*}
+
+\begin{rem}[$s$-domain] Let's recall the transformation from the $s$-domain to the $z$-domain.
+	
+	\paragraph{Poles}
+    How the poles of the continuous time system are transformed?
+
+    It can be proved that the eigenvalues (poles) follow the \emph{sampling transformation rule}.
+    \[
+        z = e^{s\Delta T} \qquad \lambda_F = e^{\lambda_A \Delta T}
+    \]
+    \vspace{-25pt}
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \draw[->] (-2,0) -- (2,0) node[above] {$\operatorname{Re}$};
+            \draw[->] (0,-2) -- (0,2) node[left] (im1) {$\operatorname{Im}$};
+
+            \draw[->] (4,0) -- (8,0) node[above] {$\operatorname{Re}$};
+            \draw[->] (6,-2) -- (6,2) node[left] (im2) {$\operatorname{Im}$};
+
+            \draw[dashed, red, line width=0.4mm] (0,-2) -- (0,1.9);
+            \fill[green, opacity=0.1] (-2,-2) rectangle (0,2);
+
+            \draw[dashed, red, line width=0.4mm] (6,0) circle (1cm);
+            \fill[green, opacity=0.1] (6,0) circle (1cm);
+
+            \node[align=center] at (0,2.8) {\textbf{$s$-domain}\\(cont. time)};
+            \node[align=center] at (6,2.8) {\textbf{$z$-domain}\\(discrete time)};
+
+            \draw[fill,blue] (0,0) circle (0.05);
+            \draw[fill,blue] (7,0) circle (0.05);
+
+            \draw[->] (2,1) -- (4,1) node[pos=0.5] (transf) {$z=e^{s\Delta T}$};
+
+            % \fill[green, opacity=0.1] ($(im2) + (3.2,0)$) rectangle ++(2,1) 
+            % 	node[pos=0.5, above=0.005cm, align=center, black, opacity=1] {as. stable\\part};
+            \node[below=2.4cm of transf] {\colorbox{green!10}{as. stable part}};
+        \end{tikzpicture}
+    \end{figure}
+
+    \paragraph{Zeroes}
+    How the zeroes of the continuous time system are transformed?
+
+    Unfortunately there is no simple rule like the poles. We can only say:
+    \[
+        G(s) = \frac{\text{polynomial in $s$ with $h$ zeros}}{\text{polynomial in $s$ with $k$ poles}} \qquad \text{if $G(s)$ is strictly proper:  (i.e. $k > h$) }
+    \]
+    \[
+        G(z) = \frac{\text{polynomial in $z$ with $k-1$ zeros}}{\text{polynomial in $z$ with $k$ poles}} \qquad \text{$G(z)$ with relative degree 1}
+    \]
+
+    We have new $k-h-1$ zeros that are generated by the discretization.
+    They are called \emph{hidden zeros}.
+
+    Unfortunately these hidden zeros are frequently outside the unit circle, which means that $G(z)$ is not minimum phase even if $G(s)$ is minimum phase.
+
+    Therefore, we need for instance GMVC to design the control system.
+\end{rem}
+
+\paragraph{2) Time-derivative discretization}
+
+Another simple discretization technique frequently used is the discretization of time-derivative $\dot{x}$.
+
+\begin{align*}
+    \text{\textbf{Eulero backward}} &\qquad \dot{x} \approx \frac{x(t)-x(t-1)}{\Delta T} = \frac{x(t)-z^{-1}x(t)}{\Delta T} = \frac{z-1}{z\Delta T} x(t) \\
+    \text{\textbf{Eulero forward}} &\qquad \dot{x} \approx \frac{x(t+1)-x(t)}{\Delta T} = \frac{zx(t)-x(t)}{\Delta T} = \frac{z-1}{\Delta T} x(t)
+\end{align*}
+
+General formula
+\[
+    \dot{x}(t) \approx \left[ \frac{z-1}{\Delta T} \frac{1}{\alpha z + (1-\alpha)} \right]x(t) \qquad \text{with } 0 \le \alpha \le 1
+\]
+We consider three special cases:
+\begin{itemize}
+    \item if $\alpha = 0$ it's Eulero Forward
+    \item if $\alpha = 1$ it's Eulero Backward
+    \item if $\alpha = \frac{1}{2}$ it's Tustin method (mostly used)
+\end{itemize}
+
+\subsection*{Choice of the sampling time}
+
+The critical choice is $\Delta T$ (sampling time).
+
+\paragraph{Simple idea}
+The general intuitive rule is: the smaller $\Delta T$, the better.
+
+\begin{rem}[Sampling and Nyquist frequency] If $\Delta T$ is sampling time, then
+    \paragraph{Notation} 
+    \[
+        f_S = \frac{1}{\Delta T} [\text{Hz}] \qquad \omega_S = \frac{2\pi}{\Delta T} [\text{rad/s}] \qquad f_N = \frac{1}{2} f_S [\text{Hz}] \qquad \omega_N = \frac{1}{2} \omega_S [\text{rad/s}]
+    \]
+    where $f_S$ is the \emph{sampling frequency} and $f_N$ is the \emph{Nyquist frequency}.
+
+    \paragraph{Spectrum of a discretized signal}
+    For a discretized signal its spectrum is limited and it ranges over $[0, \omega_N]$; as a result, the fitting between the spectrum of the discretized signal and the one of the original signal is very close at low frequencies and becomes more inaccurate as we approach $\omega_N$.
+\end{rem}
+
+If $\Delta T$ is large, $f_S$ is small (and accordingly,  $f_N$ too):
+\vspace{-5pt}
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+        a(\x) = (\x-0.5)^2+2;
+        b(\x) = -(\x-1)^2+2.125;
+        c(\x) = 2*(\x-2.5)^2+0.625;
+        d(\x) = -(\x-3)^2+0.794;
+        f(\x) = e^(-\x+2.9517);
+        z(\x) = (\x < 0.5) * 2 +
+                (0.5 <= \x) * (\x < 0.75) * a(\x) +
+                (0.75 <= \x) * (\x < 2) * b(\x) +
+                (2 <= \x) * (\x < 2.64) * c(\x) +
+                (2.64 <= \x) * (\x < 3.35) * d(\x) +
+                (3.35 <= \x) * f(\x);
+        z2(\x) = z(\x) * (\x < 1) + z(\x) * (\x >= 1) * (\x < 2.5) * (1-(\x-1)^2/1.5/4);
+    }]
+        \draw[->] (0,0) -- (5.5,0) node[below] {$\omega$};
+        \draw[->] (0,0) -- (0,3) node[left] {$|G|$};
+        \draw[samples=50,domain=0:5,variable=\x,smooth] plot ({\x},{z(\x)});
+        \draw[samples=50,red,domain=0:2.5,variable=\x,smooth] plot ({\x},{z2(\x)});
+        \draw[dotted] (2.5,0.625) -- (2.5,-0.1) node[below] {$\scriptstyle \omega_N$};
+        \draw (3.5,0.1) -- (3.5,-0.1) node[below] {$\scriptstyle \omega_S$};
+        \draw (3.5,0) edge[bend right=30,->, dashed] (2.5,0);
+        \node at (3,0.32) {$\scriptstyle \times \frac{1}{2}$};
+
+        \node[right] at (3,2) {\footnotesize Original signal (cont. time)};
+        \node[right,red] at (3,1.7) {\footnotesize Discretized signal (discr. time)};
+    \end{tikzpicture}
+\end{figure}
+
+Otherwise, if $\Delta T$ is smaller, $\omega_S$ (and $\omega_N$ accordingly) larger:
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+        a(\x) = (\x-0.5)^2+2;
+        b(\x) = -(\x-1)^2+2.125;
+        c(\x) = 2*(\x-2.5)^2+0.625;
+        d(\x) = -(\x-3)^2+0.794;
+        f(\x) = e^(-\x+2.9517);
+        z(\x) = (\x < 0.5) * 2 +
+                (0.5 <= \x) * (\x < 0.75) * a(\x) +
+                (0.75 <= \x) * (\x < 2) * b(\x) +
+                (2 <= \x) * (\x < 2.64) * c(\x) +
+                (2.64 <= \x) * (\x < 3.35) * d(\x) +
+                (3.35 <= \x) * f(\x);
+        z2(\x) = z(\x) * (\x < 2.5) + z(\x) * (\x >= 2.5) * (\x < 4) * (1-(\x-2.5)^2/1.5/4);
+    }]
+        \draw[->] (0,0) -- (5.5,0) node[below] {$\omega$};
+        \draw[->] (0,0) -- (0,3) node[left] {$|G|$};
+        \draw[samples=50,domain=0:5,variable=\x,smooth] plot ({\x},{z(\x)});
+        \draw[samples=50,red,domain=0:4,variable=\x,smooth] plot ({\x},{z2(\x)});
+
+        \draw[dotted] (4,0.35) -- (4,-0.1) node[below] {$\scriptstyle \omega_N$};
+        \draw (4.8,0.1) -- (4.8,-0.1) node[below] {$\scriptstyle \omega_S$};
+        \draw (4.8,0) edge[bend right=30,->, dashed] (4,0);
+
+        \node[right] at (3,2) {\footnotesize Original signal (cont. time)};
+        \node[right,red] at (3,1.7) {\footnotesize Discretized signal (discr. time)};
+    \end{tikzpicture}
+\end{figure}
+
+As we can see from the two graphs, in both cases we have approximations of the original spectrum but in the latter case this approximation is valid over a larger \emph{bandwidth}.
+
+\paragraph{Drawbacks} Hidden problems of a too-small $\Delta T$:
+\begin{itemize}
+    \item sampling devices (A/D and D/A) cost
+    \item computational cost (e.g. update an algorithm every $1 \mu s$ is much heavier than every $1 ms$)
+    \item cost of memory (if data storing is needed)
+    \item numerical precision cost 
+\end{itemize}
+
+The latter one is the most critical one and it is really an hidden problem.
+
+\subparagraph{Numerical precision cost}
+Let's make a numerical example to understand it.
+
+Consider a continous time \gls{tf} with two asymptotically stable poles (eigenvalues) in $s=-3$ and $s=-2$. These can be mapped in the $z$-domain using the formula $\lambda_F=e^{\lambda_A\Delta T}$. 
+If $\Delta T$ is very small (tends to zero), we squeeze all the poles very closed to the point $(1,0)$.
+
+Graphically:
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \draw[->] (-2,0) -- (2,0) node[above] {$\operatorname{Re}$};
+        \draw[->] (0,-2) -- (0,2) node[left] {$\operatorname{Im}$};
+
+        \draw[->] (4,0) -- (8,0) node[above] {$\operatorname{Re}$};
+        \draw[->] (6,-2) -- (6,2) node[left] {$\operatorname{Im}$};
+
+        \draw (6,0) circle (1cm);
+
+        \node[cross] at (-0.5,0) {};
+        \node[cross] at (-1,0) {};
+
+        \draw (-0.5,0) edge[dotted, bend left=30,->] (6.99,0);
+        \draw (-1,0) edge[dotted, bend left=30,->] (6.98,0);
+        \node[right] at (7,-0.2) {$\scriptstyle 0.99995$};
+        \node[right] at (7,-0.4) {$\scriptstyle 0.99996$};
+
+        \node at (0,2.5) {\textbf{$s$-domain}};
+        \node at (6,2.5) {\textbf{$z$-domain}};
+
+        \draw[->] (2,2) -- (4,2) node[pos=0.5] {$\lambda_F=e^{\lambda_A\Delta T}$};
+    \end{tikzpicture}
+\end{figure}
+
+Therefore, we need very high numerical precision (use a lot of digits) to avoid instability.
+
+\textbf{Rule of thumb} of control engineers: $f_S$ is between 10 and 20 times the system bandwidth we are interested in.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+        a(\x) = (\x-0.5)^2+2;
+        b(\x) = -(\x-1)^2+2.125;
+        c(\x) = 2*(\x-2.5)^2+0.625;
+        d(\x) = -(\x-3)^2+0.794;
+        f(\x) = e^(-\x+2.9517);
+        z(\x) = (\x < 0.5) * 2 +
+                (0.5 <= \x) * (\x < 0.75) * a(\x) +
+                (0.75 <= \x) * (\x < 2) * b(\x) +
+                (2 <= \x) * (\x < 2.64) * c(\x) +
+                (2.64 <= \x) * (\x < 3.35) * d(\x) +
+                (3.35 <= \x) * f(\x);
+    }]
+        \draw[->] (0,0) -- (5.5,0) node[below] {$f$};
+        \draw[->] (0,0) -- (0,3) node[left] {$|G|$};
+        \draw[samples=50,domain=0:5,variable=\x,smooth] plot ({\x},{z(\x)});
+
+        \draw[dotted] (1.5,1.875) -- (1.5,0) node[below] {};
+        \node[below, align=center] at (0.75,0) {\footnotesize bandwidth\\ \footnotesize of interest};
+        \draw (4,0.1) -- (4,-0.1) node[below] {$\scriptstyle f_N$};
+        \draw (1.5,0) edge[bend left=20,->, dashed] (4,0);
+        \node at (2.75,0.45) {$\scriptstyle \times 10$};
+    \end{tikzpicture}
+\end{figure}
+
+Usually, the bandwidth of interest is the one of the closed-loop control system.
+
+
+\paragraph{Aliasing problem}
+Another problem is managing the \emph{aliasing} problem, which is a big and critical problem in the A/D step. 
+
+\begin{thm}[Nyquist–Shannon sampling theorem]\label{th:shannon}
+    The maximum frequency content of a signal $f_{MAX}$ to be sampled should be such that $f_{MAX} \le f_N$.
+\end{thm}
+
+
+When we want measure a signal $x$ we capture also the measurement noise; indeed, what we really obtain is $\tilde{x}(t) = x(t) + e(t)$, where $e$ is the noise. 
+Hence, the spectrum of the measured signal will also be composed by the spectrum of the original signal $x$ and the spectrum of the noise.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+        f(\x) = 5/(\x^2+2) - 0.6 ;
+        g(\x) = 10/(5*\x+3) - 1.6 ;
+        h(\x) = 0.2 * (1 - (\x - 1.1)^3/2.5) ;
+    }]
+        \draw[->] (0,0) -- (5.5,0) node[below] {$f$};
+        \draw[->] (0,0) -- (0,2.5) node[left] {$\Gamma_{\tilde{x}}$};
+        \draw[blue, domain=0:3,samples=40,variable=\x,smooth] plot ({\x},{f(\x)}) 
+            node[above, blue] {$\tilde{x}$};
+        \draw[green,domain=0:1,samples=40,variable=\x,smooth] plot ({\x},{g(\x)}) 
+            node[above, green] {$x$};
+        \draw[domain=0:3,samples=40,variable=\x,smooth] plot ({\x},{h(\x)})
+            node[right] {$e$};
+
+        \draw (2.5,0.1) -- ++(0,-0.1) node[below] {$f_{MAX}$};
+    \end{tikzpicture}
+\end{figure}
+
+Therefore, if we want to sample the measured signal, and for example, $f_{MAX} = 2 \text{KHz}$, then we need $f_N \ge 2 \text{KHz} \implies f_S \ge 4 \text{KHz}$. On the other hand, we know that the bandwidth of the original signal $x(t)$ will be much smaller due to the presence of the noise. 
+For example, suppose that the frequency content of $x(t)$ is contained in the range $[0, 0.5]$ KHz: we can therefore sample with an A/D that samples at $f_S = 1$ KHz.
+
+\subparagraph{Analog solution} The classical way to deal with aliasing is to use anti-alias analog filters. 
+
+\begin{rem}[Anti-Aliasing Filter (AAF)]
+    An \emph{anti-aliasing filter (AAF)} is a filter used before a signal sampler to restrict the bandwidth of a signal to satisfy the Nyquist–Shannon sampling theorem (\ref{th:shannon}) over the band of interest.
+    
+    In practice, it is an analog low-pass filter with a cut frequency $f_{cut} \le \frac{1}{2} f_S = f_N$. 
+\end{rem}
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block,align=center] at (0,0) (sys) {\textbf{analog}\\anti-alias\\low-pass filter\\$f_{cut} = 0.5$KHz};
+        \node[block] at (3,0) (ADC) {A/D};
+
+        \draw[->] (-2.5,0) -- (sys) node[pos=0.5] {$\tilde{x}(t)$};
+        \draw[->] (sys) -- (ADC) node[pos=0.5] {$\tilde{x}'(t)$};
+        \draw[->] (ADC) -- ++(1.5,0) node[pos=0.5] {$\tilde{x}''(t)$};
+    \end{tikzpicture}
+\end{figure}
+
+In this case, since $f_S = 1$ KHz we need a low-pass filter that cut everything above $f_N = \frac{1}{2} f_S = 0.5$ KHz.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+        f(\x) = 5/(\x^2+2) - 0.6 ;
+        g(\x) = 10/(5*\x+3) - 1.6 ;
+    }]
+        \draw[->] (0,0) -- (4,0) node[below] {$f$};
+        \draw[->] (0,0) -- (0,3) node[left] {$\Gamma_{\tilde{x}}$};
+        \draw[blue, domain=0:3,samples=40,variable=\x,smooth] plot ({\x},{f(\x)}) 
+            node[above, blue] {$\tilde{x}$};
+        \draw[green,domain=0:1,samples=40,variable=\x,smooth] plot ({\x},{g(\x)}) 
+            node[above, green] {$x$};
+        \draw (2.5,0.05) -- (2.5,-0.05) node[below] {$\scriptstyle 2KHz$};
+
+        \draw[->] (5,0) -- (9,0) node[below] {$f$};
+        \draw[->] (5,0) -- (5,3) node[left] {$\Gamma_{\tilde{x}'}$};
+        \draw[blue, domain=0:1,samples=13,variable=\x,smooth] plot ({\x + 5},{f(\x)}) 
+            node[above, blue] {$\tilde{x}$};
+        \draw[green,domain=0:1,samples=13,variable=\x,smooth] plot ({\x + 5},{g(\x)}) 
+            node[below, green] {$x$};
+        \draw (6,0.05) -- (6,-0.05) node[below] {$\scriptstyle 0.5KHz$};
+        \draw[dotted] (6,0) -- (6,2.1);
+
+        % \node[align=center] at (9,2) {Nyquist generates no\\alias at 1KHz};
+    \end{tikzpicture}
+\end{figure}
+
+In this way the signal $\tilde{x}$ is ready to be sampled: aliasing is avoided ($f'_{MAX} = f_{cut} \le f_N$); furthermore, all the frequency components in the bandwidth of the original signal $x(t)$ are preserved and the ones introduced by the noise are cut-off.
+
+\subparagraph{Full digital solution}  Today the most used solution is the full digital one in which there is no more the need of an analog anti-alias filter.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
+        f(\x) = 2.7 * (1 - \x/3) + rand^2/3;
+    }]
+        \node[block, align=center] at (0,5) (ADC) {A/D \\ $f_S=4$ KHz};
+        \node[block, align=center] at (3,5) (filter) {\textbf{digital}\\low-pass\\filter \\$f_{cut} = 0.5$ KHz};
+        \node[block, align=center] at (6.3,5) (sampl) {under-sampling\\$(1:4)$\\{$f'_S=1$ KHz}};
+
+        \draw[->] (-2,5) -- (ADC) node[pos=0.5] {$\tilde{x}(t)$};
+        \draw[->] (ADC) -- (filter);
+        \draw[->] (filter) -- (sampl);
+        \draw[->] (sampl) -- ++(2,0);
+
+        % \draw [decorate,decoration={brace,amplitude=10pt},yshift=-0.1cm] (7,4.5) -- (-0.5,4.5) node [black,midway,yshift=-0.4cm] {\footnotesize fully digital};
+    \end{tikzpicture}
+\end{figure}
+
+With this approach we consider the bandwidth of the noisy signal $\tilde x(t)$ ($f_{MAX} = 2$ KHz) and we sample it at $f_S = 2 f_{MAX} = 4$ KHz. Then we cut-off all the frequency components introduced by the noise with a digital low-pass filter ($f_{MAX} \rightarrow f'_{MAX} = 0.5$ KHz). Last, since now the bandwidth of the signal is $f'_{MAX} = 0.5$ KHz, we can re-sample the signal at $f'_S = 2 f'_{MAX} = 1$ KHz: this is \emph{under-sampling (1:4)} (because $f'_S = \frac{1}{4} f_S$) which in practice means to take one sample every out of four.
+
+\end{appendices}
\ No newline at end of file
diff --git a/lectures/2022_05_17.tex b/lectures/2022_05_17.tex
new file mode 100644
index 0000000..4178741
--- /dev/null
+++ b/lectures/2022_05_17.tex
@@ -0,0 +1,557 @@
+%!TEX root = ../main.tex
+
+\externaldocument{2022_05_02}
+
+In chapter 3 we have seen classical technology of software-sensing based on \acrlong{kf}:
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+        \node[left] at (0,4) (u) {$u(t)$};
+        \node[block] at (2,4) (sys) {$\Sc$};
+        \node[block] at (2,2.3) (k) {$\bar{K}$};
+        \node[sum] at (4,2.3) (sum) {};
+        \node[right] at (6,4) (y) {$y(t)$};
+        \node[block, align=center] at (2,1) (model) {model of \\ $\Sc$};
+        \node[above] at (2,5) (dist) {disturbances};
+
+        \draw[->] (dist) -- (sys);
+        \draw[->] (u) -- (sys);
+        \draw[->] (sys) -- (y);
+        \draw[<-,red,line width=0.4mm] (sum) -- (4,4) node[pos=0.2] {$+$};
+        \draw[->] (sum) -- (k) node[midway, above] {$e(t)$};
+        \draw[->,red,line width=0.4mm] (0.5,4) |- (model);
+        \draw[->] (k) -- (model);
+        \draw[->] (model) -| (sum) 
+        	node[pos=0.85] {$-$}
+        	node[right, pos=0.7] {$\hat{y}(t|t-1)$};
+        \draw[->,red,line width=0.4mm,transform canvas={yshift=-0.2cm}] (model) -- (6,1) node[right] {$\hat{x}(t|t)$};
+        \draw[dashed, blue] (0,0) rectangle (5,3) node[right] {$\mathcal{KF}$};
+    \end{tikzpicture}
+\end{figure}
+
+\textbf{Note} If the \gls{kf} is the asymptotic one (i.e. $K(t) = \bar{K}$), it is a MIMO LTI system. 
+
+Main features of this approach:
+\begin{itemize}
+    \item A (\acrlong{wb}/physical) model is needed.
+    \item No need (in principle) of a training dataset including measurements of the state to be estimated.
+    \item It is a feedback estimation algorithm (feedback correction of the model using estimated output error).
+    \item Constructive method (non-parametric, no optimization involved).
+    \item Can be used (in principle) to estimate states which are impossible to be measured (also at prototyping/training/design stage).
+\end{itemize}
+
+Are there other classes of software-sensing techniques? Yes, black-box approaches with \emph{learning}/\emph{training} from data (system identification).
+ 
+In this chapter we see them focusing on the architecture (we do not need new algorithms, just use something we have already studied). We will re-cast the SW-sensing problem into a system identification problem. 
+
+\section{Linear Time Invariant Systems}\label{sec:BB-SW-LTI}
+
+To find the relationship between $u(t) \rightarrow \hat{x}(t|t)$ and $y(t) \rightarrow \hat{x}(t|t)$ we can use a dataset.
+Indeed, if we have a dataset
+\begin{align*}
+    \left\{ u(1), u(2), \ldots, u(N) \right\} \\
+    \left\{ y(1), y(2), \ldots, y(N) \right\} \\
+    \left\{ x(1), x(2), \ldots, x(N) \right\}
+\end{align*}
+
+we can estimate the relationship (i.e. \gls{tf}s) between the inputs ($u(t)$ and $y(t)$) and the output ($x(t)$) without modelling the system and adopting a \gls{bb} approach instead.
+
+\textbf{Note} This is a supervised training approach, thus, only for the training phase, we need measurements of the state to be estimated (using physical sensor that, in \emph{production phase}, will be replaced by the trained SW-sensor).
+
+
+\paragraph{Model selection} We focus on this family of models:
+\[
+	\hat{x}(t|t) = S_{ux}(z, \theta) u(t-1) + S_{yx}(z,\theta)y(t)
+\]
+where we have at least 1-step delay on $u(t)$. The block scheme of this model is: 
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+    	\node[block] (z) {$z^{-1}$};
+        \node[block, right of=z] (ux) {$S_{ux}(z, \theta)$};
+        \node[block, below of=ux] (yx) {$S_{yx}(z, \theta)$};
+        \node[left,left of=z] (u) {$u(t)$};
+        \node[left, below of=u] (y) {$y(t)$};
+        \node[sum,right of=ux,xshift=1cm,yshift=-1cm] (sum) {};
+        \node[right, right of=sum] (x) {$\hat{x}(t|t)$};
+
+        \draw[->] (u) -- (z);
+        \draw[->] (z) -- (ux);
+        \draw[->] (y) -- (yx);
+        \draw[->] (ux) -| (sum);
+        \draw[->] (yx) -| (sum);
+        \draw[->] (sum) -- (x);
+    \end{tikzpicture}
+\end{figure}
+
+\paragraph{Performance index}
+We define the usual performance index as the \emph{sample variance of the estimation error}: 
+\[
+    J_N(\theta) = \frac{1}{N}\sum_{t=1}^N \left( x(t) - (S_{ux}(z, \theta) u(t-1) + S_{yx}(z,\theta)y(t)) \right)^2
+\]
+
+\paragraph{Optimization}
+\[
+    \hat{\theta}_N = \argmin_\theta J_N(\theta)
+\]
+
+We obtain the \acrlong{bb} software sensor $\hat{x}(t|t)$ as: 
+\[
+	\hat{x}(t|t) = S_{ux}(z, \hat{\theta}_N) u(t-1) + S_{yx}(z,\hat{\theta}_N)y(t)
+\]
+
+
+\textbf{Note} Once the SW-sensor has been designed (trained), we no longer need samples ofokok $x(t)$.
+
+\textbf{Note} The above method is a classic \gls{bb} parametric approach (using \gls{tf}s) but the same can also be done using 4-SID algorithm.
+
+
+\section{Non-linear Systems}
+
+In case the system is non-linear, we can use the same idea used in the LTI case (see \ref{sec:BB-SW-LTI}), where we replace the asymptotic \gls{kf} with a non-linear SW-sensor (like EKF, described in \ref{subsec:KF_non-lin_ext})
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block,dashed border,align=center] at (2,4) (n1) {$\Sc_{NL}$};
+        \node[block,double border,align=center] at (2,2) (n2) {$f_{NL}$};
+        \node[block,dashed border,align=center] at (2,0) (n3) {model of\\ $\Sc_{NL}$};
+        \node[sum] at (4,2) (sum) {};
+        \node[left] at (0,4) (u) {$u(t)$};
+        \node[left] at (6,4) (y) {$y(t)$};
+
+        \draw[dashed, blue] (0,-1) rectangle (5,3) 
+        	node[right, align=center] {non-linear\\SW-sensor};
+
+        \draw[->] (u) -- (n1);
+        \draw[->] (0.5,4) |- (n3);
+        \draw[->] (n2) -- (n3);
+        \draw[->] (sum) -- (n2);
+        \draw[->] (n1) -| (sum) node[pos=0.9] {+};
+        \draw[<-] (sum) |- (n3) node[pos=0.1] {-};
+        \draw[->] (n1) -- (y);
+        \draw[->,transform canvas={yshift=-0.3cm}] (n3) -- ++(4,0) node[right] {$\hat{x}(t|t)$};
+    \end{tikzpicture}
+\end{figure}
+
+\begin{rem}[Block scheme notation for non-linear system] 
+	Notation used from now on to represent non-linear systems:
+	\begin{figure}[H]
+	    \centering
+	    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+	        \node[block,double border,align=center] (n1) {non-lin.\\\textbf{static}\\system};
+	        \node[block,dashed border,align=center, right, right=3cm of n1](n2) {non-lin.\\\textbf{dynamic}\\system};
+	    \end{tikzpicture}
+	\end{figure}
+\end{rem}
+
+% \begin{rem}
+%     In \gls{kf} the E.K.F. extension uses the trick of a time-varying linear gain $K(t)$ but the obvious choice is a non-linear gain (static nonlinear function).
+% \end{rem}
+
+The content of the box (non-linear SW-sensor) is:
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block,dashed border,align=center] at (0,0) (n) {non-linear\\dynamic\\time invariant\\system};
+        \draw[<-,transform canvas={yshift=0.3cm}] (n) -- ++(-2,0) node[left] {$u(t)$};
+        \draw[<-,transform canvas={yshift=-0.3cm}] (n) -- ++(-2,0) node[left] {$y(t)$};
+        \draw[->] (n) -- ++(2,0) node[right] {$\hat{x}(t|t)$};
+    \end{tikzpicture}
+\end{figure}
+
+The problem is again the \gls{bb} identification of a non-linear dynamic system, starting from a measured training dataset.
+
+There are 4 (3+1) different architectures to design the non-linear SW-sensor.
+
+\paragraph{Architecture \#1} Use a \emph{Dynamical Recurrent Neural Network (Dynamical RNN)} in which we update \emph{static neurons} into \emph{dynamic neurons}.
+
+	\begin{figure}[H]
+	    \centering
+	    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+	        \node[block,dashed border,align=center] at (0,0) (n) {recurrent\\neural network};
+	        \draw[<-,transform canvas={yshift=0.3cm}] (n) -- ++(-2,0) node[left] {$u(t)$};
+	        \draw[<-,transform canvas={yshift=-0.3cm}] (n) -- ++(-2,0) node[left] {$y(t)$};
+	        \draw[->] (n) -- ++(2,0) node[right] {$\hat{x}(t|t)$};
+	    \end{tikzpicture}
+	\end{figure}
+
+If we zoom into a single neuron: 
+
+\begin{figure}[H]
+    \centering
+    \begin{minipage}[t]{0.48\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+            \node[block] at (0,4) (a1) {$a_1$};
+            \node[block] at (0,3) (a2) {$a_2$};
+            \node at (0,2) {$\vdots$};
+            \node[block] at (0,1) (ah) {$a_h$};
+            \node[sum] at (2,2) (sum) {};
+            \node[block,ellipse,align=center] at (3.5,2) (nlf) {$f_{NL}$};
+
+            \draw[<-] (a1) -- ++(-1,0);
+            \draw[<-] (a2) -- ++(-1,0);
+            \draw[<-] (ah) -- ++(-1,0);
+
+            \draw[->] (a1) -- (sum);
+            \draw[->] (a2) -- (sum);
+            \draw[->] (ah) -- (sum);
+            \draw[<-] (sum) -- ++(0,0.5) node[above] {$b$};
+            \draw[->] (sum) -- (nlf);
+            \draw[->] (nlf) -- ++(1.5,0);
+        \end{tikzpicture}
+        \caption*{Static neuron (non-linear static system)}
+    \end{minipage}
+    \begin{minipage}[t]{0.48\textwidth}
+        \centering
+        \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+            \node[block] at (0,4) (a1) {$a_1$};
+            \node[block] at (0,3) (a2) {$a_2$};
+            \node at (0,2) {$\vdots$};
+            \node[block] at (0,1) (ah) {$a_h$};
+            \node[sum] at (2,2) (sum) {};
+            \node[block,ellipse,align=center] at (3.5,2) (nlf) {$f_{NL}$};
+            \node[block, below of=nlf] (z) {$z^{-1}$};
+            \node[block, below of=sum] (c) {$c$};
+
+            \draw[<-] (a1) -- ++(-1,0);
+            \draw[<-] (a2) -- ++(-1,0);
+            \draw[<-] (ah) -- ++(-1,0);
+
+            \draw[->] (a1) -- (sum);
+            \draw[->] (a2) -- (sum);
+            \draw[->] (ah) -- (sum);
+            \draw[<-] (sum) -- ++(0,0.5) node[above] {$b$};
+            \draw[->] (sum) -- (nlf);
+            \draw[->] (nlf) -- ++(1.5,0);
+
+            \draw[->] (4.5,2) |- (z);
+            \draw[->] (z) -- (c);
+            \draw[->] (c) -- (sum);
+        \end{tikzpicture}
+        \caption*{Dynamic neuron (non-linear dynamic system)}
+    \end{minipage}
+\end{figure}
+where $f_{NL}$ is a non-linear function (e.g. sigmoid function).
+
+Using an RNN with dynamic neurons is the most general approach but it is also practically seldom used due to stability issues and convergence of training.
+
+\paragraph{Architecture \#2} Split the SW-sensor into a static non-linear system and a dynamic linear system (namely, a non-recursive FIR scheme)
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block] at (1.5,0) (yn) {$z^{-1}$};
+        \node[block] at (1.5,1.5) (y2) {$z^{-1}$};
+        \node[block] at (1.5,2.5) (y1) {$z^{-1}$};
+
+        \node[block] at (1.5,4) (un) {$z^{-1}$};
+        \node[block] at (1.5,5.5) (u2) {$z^{-1}$};
+        \node[block] at (1.5,6.5) (u1) {$z^{-1}$};
+
+        \node[left] at (0,6.5) (u) {$u(t)$};
+        \node[left] at (0,3.2) (y) {$y(t)$};
+
+        \node[block,minimum height=7cm,double border,align=center] at (5.25,3.25) (sys) {non-linear\\static\\parametric\\function\\$f_{NL}(\cdot; \theta)$ \vspace{10pt} \\ (e.g. static\\neural\\network)};
+
+        \draw[->] (u) -- (u1);
+        \draw[->] (u1) -- (u2);
+        \draw[->,dotted] (u2) -- (un);
+        \draw[->] (u1) -- (u1-|sys.west) node[pos=0.5] {$u(t-1)$};
+        \draw[->] (u2) -- (u2-|sys.west) node[pos=0.5] {$u(t-2)$};
+        \draw[->] (un) -- (un-|sys.west) node[pos=0.5] {$u(t-n_u)$};
+
+        \draw[->] (y) -- (y-|sys.west);
+        \draw[->] (1.5,3.2) -- (y1);
+        \draw[->] (y1) -- (y2);
+        \draw[->,dotted] (y2) -- (yn);
+        \draw[->] (y1) -- (y1-|sys.west) node[pos=0.5] {$y(t-1)$};
+        \draw[->] (y2) -- (y2-|sys.west) node[pos=0.5] {$y(t-2)$};
+        \draw[->] (yn) -- (yn-|sys.west) node[pos=0.5] {$y(t-n_y)$};
+
+        \draw[->] (sys) -- ++(2,0) node[right] {$\hat{x}(t|t)$};
+
+        \draw[decoration={brace}, decorate] (3.5,-0.7) node {} -- (0,-0.7);
+        \node[align=center,below] at (1.75,-0.9) {linear dynamic\\system};
+
+        \draw[decoration={brace}, decorate] (7,-0.7) node {} -- (3.6,-0.7);
+        \node[align=center,below] at (5.25,-0.9) {non-linear static\\system to be\\estimated};
+    \end{tikzpicture}
+\end{figure}
+
+\begin{rem}[Pros]
+	Some pros about this architecture:
+	\begin{itemize}
+		\item Training (supervised) done only to the non-linear static part of the SW-sensor (much simpler than the estimation of an RNN).
+		\item Stability is guaranteed by construction since it is a static FIR architecture.
+	\end{itemize}	
+\end{rem}
+
+\begin{rem}[Cons]
+    In case of a MIMO system with
+    \begin{align*}
+        m \text{ inputs: } u(t) = \begin{bmatrix}
+            u_1(t) \\
+            \vdots \\
+            u_m(t)
+        \end{bmatrix} \quad p \text{ outputs: } y(t) = \begin{bmatrix}
+            y_1(t) \\
+            \vdots \\
+            y_p(t)
+        \end{bmatrix} \quad n \text{ states: } x(t) = \begin{bmatrix}
+            x_1(t) \\
+            \vdots \\
+            x_n(t)
+        \end{bmatrix}
+    \end{align*}
+
+    the estimation problem is the search of the optimal parameter vector $\theta$ for the function
+    \[
+        f(\cdot; \theta): \RR^{m\times n_u + p \times (n_y + 1)} \rightarrow \RR^n
+    \]    
+    Therefore, the I/O size of this non-linear parametric function can be very large.
+\end{rem}
+
+\paragraph{Architecture \#3} Like architecture \#2, we split the SW-sensor into a static non-linear system and a linear dynamic system but, this time, with a recursive IIR scheme.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block] at (1.5,0) (yn) {$z^{-1}$};
+        \node[block] at (1.5,1.5) (y2) {$z^{-1}$};
+        \node[block] at (1.5,2.5) (y1) {$z^{-1}$};
+
+        \node[block] at (1.5,4) (un) {$z^{-1}$};
+        \node[block] at (1.5,5.5) (u2) {$z^{-1}$};
+        \node[block] at (1.5,6.5) (u1) {$z^{-1}$};
+
+        \node[block] at (1.5,8) (xn) {$z^{-1}$};
+        \node[block] at (1.5,9.5) (x2) {$z^{-1}$};
+        \node[block] at (1.5,10.5) (x1) {$z^{-1}$};
+
+        \node[left] at (0,10.5) (x) {$x(t)$};
+        \node[left] at (0,6.5) (u) {$u(t)$};
+        \node[left] at (0,3.2) (y) {$y(t)$};
+
+        \node[block,minimum height=11cm,minimum width=1.5cm,double border,align=center] at (5.25,5.25) (sys) {non-linear\\static\\parametric\\function\\$f_{NL}(\cdot; \theta)$};
+
+        \draw[->] (u) -- (u1);
+        \draw[->] (u1) -- (u2);
+        \draw[->,dotted] (u2) -- (un);
+        \draw[->] (u1) -- (u1-|sys.west) node[pos=0.5] {$u(t-1)$};
+        \draw[->] (u2) -- (u2-|sys.west) node[pos=0.5] {$u(t-2)$};
+        \draw[->] (un) -- (un-|sys.west) node[pos=0.5] {$u(t-n_u)$};
+
+        \draw[->] (y) -- (y-|sys.west);
+        \draw[->] (1.5,3.2) -- (y1);
+        \draw[->] (y1) -- (y2);
+        \draw[->,dotted] (y2) -- (yn);
+        \draw[->] (y1) -- (y1-|sys.west) node[pos=0.5] {$y(t-1)$};
+        \draw[->] (y2) -- (y2-|sys.west) node[pos=0.5] {$y(t-2)$};
+        \draw[->] (yn) -- (yn-|sys.west) node[pos=0.5] {$y(t-n_y)$};
+
+        \draw[->] (x) -- (x1);
+        \draw[->] (x1) -- (x2);
+        \draw[->,dotted] (x2) -- (xn);
+        \draw[->] (x1) -- (x1-|sys.west) node[pos=0.5] {$x(t-1)$};
+        \draw[->] (x2) -- (x2-|sys.west) node[pos=0.5] {$x(t-2)$};
+        \draw[->] (xn) -- (xn-|sys.west) node[pos=0.5] {$x(t-n_x)$};
+
+        \draw[->] (sys) -- ++(2,0) node[right] {$\hat{x}(t|t)$};
+
+    \end{tikzpicture}
+    \caption*{Architecture \#3 during training phase}
+\end{figure}
+
+\begin{rem}[Pros \& Cons]
+	The advantage is that usually in IIR architecture $n_u$ and $n_y$ are much smaller (thanks to recursion): lower computation 
+	effort. 
+
+	For the disadvantages we have to notice that only for the training part we use $x(t)$ data from a physical sensor; then, in production, the recursion comes into play since we need to plug-in the output $\hat{x}(t|t)$ in the recursive part: this feedback can provide instability. 
+
+	\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block] at (1.5,0) (yn) {$z^{-1}$};
+        \node[block] at (1.5,1.5) (y2) {$z^{-1}$};
+        \node[block] at (1.5,2.5) (y1) {$z^{-1}$};
+
+        \node[block] at (1.5,4) (un) {$z^{-1}$};
+        \node[block] at (1.5,5.5) (u2) {$z^{-1}$};
+        \node[block] at (1.5,6.5) (u1) {$z^{-1}$};
+
+        \node[block] at (1.5,8) (xn) {$z^{-1}$};
+        \node[block] at (1.5,9.5) (x2) {$z^{-1}$};
+        \node[block] at (1.5,10.5) (x1) {$z^{-1}$};
+
+        \node[left] at (0,10.5) (x) {};
+        \node[left] at (0,6.5) (u) {$u(t)$};
+        \node[left] at (0,3.2) (y) {$y(t)$};
+
+        \node[block,minimum height=11cm,minimum width=1.5cm,double border,align=center] at (5.7,5.25) (sys) {non-linear\\static\\parametric\\function\\$f_{NL}(\cdot; \theta)$};
+
+        \draw[->] (u) -- (u1);
+        \draw[->] (u1) -- (u2);
+        \draw[->,dotted] (u2) -- (un);
+        \draw[->] (u1) -- (u1-|sys.west) node[pos=0.5] {$u(t-1)$};
+        \draw[->] (u2) -- (u2-|sys.west) node[pos=0.5] {$u(t-2)$};
+        \draw[->] (un) -- (un-|sys.west) node[pos=0.5] {$u(t-n_u)$};
+
+        \draw[->] (y) -- (y-|sys.west);
+        \draw[->] (1.5,3.2) -- (y1);
+        \draw[->] (y1) -- (y2);
+        \draw[->,dotted] (y2) -- (yn);
+        \draw[->] (y1) -- (y1-|sys.west) node[pos=0.5] {$y(t-1)$};
+        \draw[->] (y2) -- (y2-|sys.west) node[pos=0.5] {$y(t-2)$};
+        \draw[->] (yn) -- (yn-|sys.west) node[pos=0.5] {$y(t-n_y)$};
+
+        \draw[->] (x) -- (x1);
+        \draw[->] (x1) -- (x2);
+        \draw[->,dotted] (x2) -- (xn);
+        \draw[->] (x1) -- (x1-|sys.west) node[pos=0.5] {$\hat{x}(t-1|t-1)$};
+        \draw[->] (x2) -- (x2-|sys.west) node[pos=0.5] {$\hat{x}(t-2|t-2)$};
+        \draw[->] (xn) -- (xn-|sys.west) node[pos=0.5] {$\hat{x}(t-n_x|t-n_x)$};
+
+        \draw (sys.east) -| ($(x) + (7.5,1.5)$) node[midway, right] {$\hat{x}(t|t)$};
+
+        \draw ($(x) + (7.5,1.5)$) -- ($(x) + (0,1.5)$);
+
+        \draw[->] ($(x) + (0,1.5)$) |- (x1.west) 
+        	node[midway, left] {$\hat{x}(t|t)$};
+
+        \draw[dashed, blue] (0.7,7.2) rectangle (4.6,11.3) node[left,above] {recursive part};
+    \end{tikzpicture}
+    \caption*{Architecture \#3 during production phase}
+\end{figure}
+\end{rem}
+
+\paragraph{Architecture \#4} Modification with a-priori signal processing of architectures \#1, \#2 and \#3. The idea is to split the SW-sensor in two stages:
+\begin{itemize}
+	\item first stage: from $u(t)$ and $y(t)$, $h$ \emph{regressors} $r_i(t)$ are produced starting from physical knowledge of the system (where $h$ is much smaller than the number of $u(t)$ and $y(t)$ signals)
+	\item second stage: SW-sensor (could be both linear or non-linear and both static or dynamic system) to be firstly trained and then used in production.
+\end{itemize}
+
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block, dashed border, minimum width=1.5cm, minimum height=3cm, align=center] at (0,0) (sys) {pre-processing\\filter};
+        \node[block, dashed border, minimum height=3cm] at (4,0) (f) {$f(\cdot;\theta)$};
+
+        \draw[<-,transform canvas={yshift=0.5cm}] (sys) -- ++(-2cm,0) node[left] {$u(t)$};
+        \draw[<-,transform canvas={yshift=-0.5cm}] (sys) -- ++(-2cm,0) node[left] {$y(t)$};
+
+        \draw[->,transform canvas={yshift=1.2cm}] (sys) -- (f) node[pos=0.5] {$r_1(t)$};
+        \draw[->,transform canvas={yshift=0.6cm}] (sys) -- (f) node[pos=0.5] {$r_2(t)$};
+        \draw[->,transform canvas={yshift=-1.2cm}] (sys) -- (f) node[pos=0.5] {$r_h(t)$};
+        \node at (2,0) {$\vdots$};
+        \draw[->] (f) -- ++(2cm,0) node[right] {$\hat{x}(t|t)$};
+    \end{tikzpicture}
+\end{figure}
+
+
+The idea is to facilitate the estimation of $f(\cdot; \theta)$ by presenting at its input a smaller and more meaningful set of signals (regressors). In this way the \gls{bb} model identification is much simpler.
+
+\paragraph{Conclusions} In case of \gls{bb} SW-sensing with non-linear systems the problem can be quite complex.
+Using \emph{brute-force} approach (1 dynamic neural network) is usually doomed to failure.
+The best is to gain some insight into the system and build some \emph{smart} regressors before black-box map.
+
+\section{Comparison between \gls{kf} and \gls{bb} software sensing}
+
+\begin{table}[htpb]
+    \centering
+    \bgroup
+    \def\arraystretch{1.5}
+    \begin{tabular}{l|c|c}
+        & \textbf{\gls{kf}} & \textbf{\gls{bb}} \\
+        \hline\hline 
+        Need of (\gls{wb}) physical model of the system & \color{red} Yes & \color{green} No \\ \hline 
+        Need of a training dataset & \color{green} No {\color{black} \footnote{In practice some tuning through data is needed.}} & \color{red} Yes \\ \hline 
+        Interpretability of the obtained SW-sensor & \color{green} Yes & \color{red} No \\ \hline 
+        Easy retuning for a similar (different) system & \color{green} Yes & \color{red} No \\ \hline 
+        Accuracy of the obtained SW-sensor & \color{green} Good & \color{green} Very Good \\ \hline 
+        Can be used also in case of un-measurable states & \color{green} Yes & \color{red} No \\ \hline\hline 
+    \end{tabular}
+    \egroup
+\end{table}
+\FloatBarrier
+
+\begin{exa}[Example \ref{ex:KF_full-proc} continued]
+    Model (key equation) of the system:
+    \[
+        M\ddot{z} = -c(t)(\dot{z}-\dot{z}_d) - K(z-z_d)
+    \]
+
+    Measurable input $\ddot{z}$ with an accelerometer, $z-z_d$ measurable output with elongation sensor.
+    We want to estimate $\dot{z}$.
+
+    The change is $c(t)$ is a semi-active suspension, can be electronically changed (control variable).
+
+    We can solve the problem with \gls{kf} or we can make an experiment and collect training data:
+    \begin{align*}
+        c(t)        : & \left\{ c(1), c(2), \cdots, c(N) \right\} \\
+        z(t)-z_d(t) : & \left\{ z(1)-z_d(1), z(2)-z_d(2), \cdots, z(N)-z_d(N) \right\} \\
+        \ddot{z}(t) : & \left\{ \ddot{z}(1), \ddot{z}(2), \cdots, \ddot{z}(N) \right\} \\
+        \dot{z}(t)  : & \left\{ \dot{z}(1), \dot{z}(2), \cdots, \dot{z}(N) \right\} \text{ (just for training)} \\
+    \end{align*}
+
+    % this part has been done the 18/05/2020
+
+    Back to the main equation:
+    \[
+        M\ddot{z} = -K(z-z_d)-C(t)(\dot{z}-\dot{z}_d)
+    \]
+    \[
+        \underbrace{\dot{z}}_{\text{to be estim.}} =
+        -\frac{K}{M} \underbrace{\int (z-z_d)dt}_{r_1(t)}
+        -\frac{1}{M} \underbrace{\int C(t)(\dot{z}-\dot{z}_d)dt}_{r_2(t)}
+    \]
+
+    We also consider this equation
+    \[
+        \dot{z}_d = \underbrace{\int \ddot{z}_d dt}_{r_3(t)}
+    \]
+
+    $r_1(t)$ and $r_2(t)$ are the primary regressors, directly linked to $\dot{z}(t)$. $r_3(t)$ is a secondary regressor, it can help $r_1(t)$.
+
+    Since these regressors are obtained by integration to avoid drifting (by DC components of noise integration) we have to high-pass the inputs with high-pass filters $\left(\frac{z-1}{z-a}\right)$.
+
+    \paragraph{Full filtering scheme} \phantom{lol}
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \draw[block, dashed border] (0.5,-0.5) rectangle ++(5.5,6);
+            \node[block, double border, minimum width=1.5cm, minimum height=6cm] at (8,2.5) (f) {$f(\cdot, \theta)$};
+            \node[left] at (0,0) (c) {$c(t)$};
+            \node[left] at (0,3) (d) {$z-z_d$};
+            \node[left] at (0,5) (z) {$\ddot{z}$};
+            \node[sum] at (2,0) (mult) {$\times$};
+            \node[block] at (2,1.5) (d1) {$\frac{z-1}{z}$};
+            \node[block] at (3.5,0) (d2) {$\frac{z-1}{z-a}$};
+            \node[block] at (5,0) (d3) {$\frac{1}{z-1}$};
+            \node[block] at (3.5,3) (d4) {$\frac{z-1}{z-a}$};
+            \node[block] at (5,3) (d5) {$\frac{1}{z-1}$};
+            \node[block] at (3.5,5) (d6) {$\frac{z-1}{z-a}$};
+            \node[block] at (5,5) (d7) {$\frac{1}{z-1}$};
+
+            \node[below] at (3,-0.7) {regressors building block};
+
+            \draw[->] (c) -- (mult);
+            \draw[->] (d1) -- (mult);
+            \draw[->] (mult) -- (d2);
+            \draw[->] (d2) -- (d3);
+            \draw[->] (z) -- (d6);
+            \draw[->] (d6) -- (d7);
+            \draw[->] (d) -- (d4);
+            \draw[->] (d4) -- (d5);
+            \draw[->] (d) -| (d1);
+
+            \draw[->] (d7) -- (d7-|f.west) node[pos=0.7] {$r_3(t)$};
+            \draw[->] (d5) -- (d5-|f.west) node[pos=0.7] {$r_1(t)$};
+            \draw[->] (d3) -- (d3-|f.west) node[pos=0.7] {$r_2(t)$};
+            \draw[->] (f) -- (9.5,2.5) node[right] {$\hat{\dot{z}}$};
+        \end{tikzpicture}
+    \end{figure}
+\end{exa}
diff --git a/lectures/2022_05_23.tex b/lectures/2022_05_23.tex
new file mode 100644
index 0000000..ea09703
--- /dev/null
+++ b/lectures/2022_05_23.tex
@@ -0,0 +1,441 @@
+%!TEX root = ../main.tex
+
+\acrfull{gb} approach is something between \acrlong{wb} (where we need the physical model, like \gls{kf}) and \acrlong{bb} (machine learning, data-driven approach).
+
+We'll see two \gls{gb} approaches:
+\begin{itemize}
+    \item using \gls{kf} theory
+    \item using classical parametric framework
+\end{itemize}
+
+\section{\gls{gb} system identification using Kalman Filter}
+\acrlong{kf} is not a system identification method, it is a variable estimation approach (software-sensor, observer).
+However we can use it also for gray-box system identification (\emph{side benefit} of \gls{kf}).
+
+\paragraph{Problem definition} We have a model, typically built as a \gls{kf} model using first principles (mathematical equations are known):
+\[
+    \Sc: \begin{cases}
+        x(t+1) = f(x(t), u(t); \theta) + v_1(t) \\
+        y(t) = h(x(t); \theta) + v_2(t)
+    \end{cases}
+\]
+
+$f$ and $h$ are linear or non-linear functions, depending on \textbf{some unknown parameters} $\theta$ (with a physical meaning, e.g. mass, resistance, friction, \dots).
+
+The problem is to estimate $\hat{\theta}$ from a dataset.
+
+\paragraph{Problem solution}
+
+\gls{kf} solves this problem by transforming the unknown parameters in extended states: \gls{kf} makes the simultaneous estimation of $\hat{x}(t|t)$ (classic \gls{kf} problem) and of $\hat{\theta}(t)$ (parameter identification problem).
+
+\subparagraph{Trick} State extension
+
+\[
+    \Sc: \begin{cases}
+        x(t+1) = f(x(t), u(t); \theta(t)) + v_1(t) \\
+        \theta(t+1) = \theta(t) + v_\theta(t) \\
+        y(t) = h(x(t), \theta(t)) + v_2(t)
+    \end{cases}
+\]
+
+The new extended state vector is $x_E = \begin{bmatrix} x(t) \\ \theta(t) \end{bmatrix}$.
+The unknown parameters are transformed in unknown variables.
+
+\begin{rem}[New state equation for $\theta$ estimation]
+    The new equation we created
+    \[
+        \theta(t+1) = \theta(t) + v_\theta(t)
+    \]
+    is a \emph{fictitious} equation (not a physical equation).
+
+    The core dynamics is $\theta(t+1)=\theta(t)$: it's the equations of something which is constant.
+    This is exactly the nature of $\theta(t)$ which is indeed a constant vector of parameters.
+
+    We need a \emph{fictitious} noise in order to force the \acrlong{kf} to find the right value of $\theta$ (otherwise \gls{kf} probably would stay fixed on the initial condition).
+    We're telling \gls{kf} to do not rely on initial conditions.
+
+    Notice that this equation is not of an asymptotic stable system but a simply-stable system.
+    It's not a problem because \gls{kf} can deal with non-asymptotically stable systems.
+\end{rem}
+
+\subparagraph{Design choice} The critical point is, as for any \gls{kf} problem, the choice of the covariance matrix of $v_\theta(t) \sim WN(0, V_\theta)$. 
+
+We make the empirical assumption that $v_1 \perp v_\theta$ and $v_2 \perp v_\theta$ (there is no reason for $v_\theta$ to be correlated with $v_1$ and $v_2$).
+We also usually make the following strongly simplifying assumption on $V_\theta$: 
+\[
+    V_\theta = \begin{bmatrix}
+        \lambda_{1\theta}^2 & & & \\
+        & \lambda_{2\theta}^2 & & \\
+        & & \ddots & \\
+        & & & \lambda_{n_\theta\theta}^2\\
+    \end{bmatrix}
+\]
+
+where $\lambda_{1\theta}^2=\lambda_{2\theta}^2=\dots=\lambda_{n_\theta\theta}^2 = \lambda_{\theta}^2$.
+In practice this means that $v_\theta(t)$ is a vector of independent \gls{wn}s all with the same variance $\lambda_\theta^2$.
+The choice of this single parameter is made empirically: it is a design parameter. 
+
+By calling $\bar{\theta}$ the true value of $\theta$ and starting from an initial condition $\theta(0)$ (i.e. the best guess on the parameters' value), we can graphically represent the behavior of $\theta(t)$ depending on different choice of $\lambda_{\theta}^2$ (assuming that $\theta(t)$ is a single parameter). In particular, we consider the following extreme situations:
+\begin{itemize}
+    \item big $\lambda_\theta^2$
+    \item small $\lambda_\theta^2$
+\end{itemize}
+
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[
+            node distance=2cm,auto,>=latex',
+            declare function={
+                f1(\x) = (\x < 2) * (\x/2*(3-1)) +
+                         (\x >= 2) * (3-1) +
+                         (\x > 0.2) * rand/2.5 +
+                         1;
+                f2(\x) = (\x < 4.5) * (\x/4.5*(3-1)) +
+                         (\x >= 4.5) * (3-1) +
+                         (\x > 0.2) * rand/15 +
+                         1;
+            }
+        ]
+        \draw[->] (0,0.5) -- (6,0.5) node[below] {$t$};
+        \draw[->] (0,0.5) -- (0,3.7) node[left] {$\theta(t)$};
+
+        \node[green] at (2.3,1.5) {\footnotesize small $\lambda_\theta^2$};
+        \node[blue] at (2.3,3.7) {\footnotesize big $\lambda_\theta^2$};
+
+        \draw[dotted] (6,3) -- (0,3) node[left] {$\bar{\theta}$};
+        \draw[domain=0:5.5,smooth,variable=\x,blue,samples=70] plot ({\x},{f1(\x)});
+        \draw[domain=0:5.5,smooth,variable=\x,green,samples=70] plot ({\x},{f2(\x)});
+
+        \draw[mark=*] plot coordinates {(0,1)} node[left, align=right] {Initial\\condition};
+    \end{tikzpicture}
+    % \vspace{-10pt}
+    % \caption*{Influence of choice of $\lambda_\theta^2$}
+\end{figure}
+
+With a small value of $\lambda_\theta^2$ there is a slow convergence with small oscillations (noise) at steady-state (big trust to initial conditions).
+With large values of $\lambda_\theta^2$, instead, there's fast convergence but noisy at steady-state.
+
+$\lambda_\theta^2$ is selected according to the best compromise for your specific application.
+
+\textbf{Note} This method can be useful when $\theta$ is varying (e.g. aging of a resistance).
+
+\textbf{Note} This trick can work in principle with any number of unknown parameters (e.g. 3 sensors, 10 states and 20 parameters).
+In practice it works well only on a limited number of parameters (e.g. 3 sensors, 5 states and 2 parameters).
+
+\begin{exa}
+    Mass linked to the wall with a spring (stiffness $K$) and with a pulling (or pushing) force $F(t)$.
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \draw (0,0) -- (6,0);
+            \draw[pattern=north east lines] (0,0) rectangle (-0.5,2);
+            \draw[] (2,0) rectangle (4,1.5);
+            \node at (3,0.75) {$M$};
+            \draw[decoration={aspect=0.3, segment length=1mm, amplitude=2mm,coil},decorate] (0.5,0.75) -- (1.5,0.75);
+            \draw (0,0.75) -- (0.5,0.75);
+            \draw (1.5,0.75) -- (2,0.75);
+            \draw[->] (4,0.75) -- (5,0.75) node[right] {$F(t)$};
+            \fill[pattern=north east lines] (1.8,0) rectangle ++(2.4,-0.1) node[above right] {\footnotesize friction $c$};
+            \draw[->] (3,-0.5) -- (4,-0.5) node[right] {$x$};
+            \draw (3,-0.55) -- (3,-0.45);
+        \end{tikzpicture}
+    \end{figure}
+
+    \begin{description}
+        \item[Input] $F(t)$
+        \item[Output] Position $x(t)$ (measured with a physical sensor)
+        \item[Parameters] Realistic case: $K$ and $M$ are known (measured), but the friction $c$ is unknown
+    \end{description}
+
+    \begin{figure}[H]
+        \centering
+        \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+            \node[block,align=center] at (0,0) (s) {system\\$c = ?$};
+            \draw[<-] (s) -- ++(-1.5,0) node[left] {$F(t)$};
+            \draw[->] (s) -- ++(1.5,0) node[right] {$x(t)$};
+        \end{tikzpicture}
+    \end{figure}
+
+    \paragraph{Problem} Estimate $c$ with a \gls{kf}
+
+    Since we are using a \gls{kf}, we do not need a training dataset, but we need to model the system.
+
+    \paragraph{Step 1} Model the system
+    \[
+        \ddot{x}M = -Kx - c\dot{x} + F(t)
+    \]
+    It's a differential, continuous time linear equation.
+    It's second order so we need 2 state variables: $x_1(t) = x(t)$ and $x_2(t) = \dot{x}(t)$.
+    \[
+        \begin{cases}
+            \dot{x}(t) = x_2(t) \\
+            M\dot{x}_2(t) = -Kx_1(t) -cx_2(t) + F(t) \\
+            y(t) = x_1(t)
+        \end{cases}
+        \begin{cases}
+            \dot{x}_1(t) = x_2(t) \\
+            \dot{x}_2(t) = -\frac{K}{M} x_1(t) - \frac{c}{M} x_2(t) + \frac{1}{M}F(t) \\
+            y(t) = x_1(t)
+        \end{cases}
+    \]
+
+    \paragraph{Step 2} Discretization
+
+    Eulero forward (see \nameref{appendix:discr}): $\dot{x}(t) \approx \frac{x(t+1)-x(t)}{\Delta}$.
+
+    \[
+        \begin{cases}
+            \frac{x_1(t+1)-x_1(t)}{\Delta} &= x_2(t) \\
+            \frac{x_2(t+1)-x_2(t)}{\Delta} &= -\frac{K}{M} x_1(t) - \frac{c}{M} x_2(t) + \frac{1}{M}F(t) \\
+            y(t) &= x_1(t)
+        \end{cases}
+    \]
+    \[
+        \begin{cases}
+            x_1(t+1) = x_1(t) + \Delta x_2(t) \\
+            x_2(t+1) = -\frac{K\Delta}{M}x_1(t) + \left(1-\frac{{\color{red}c}\Delta}{M}\right)x_2(t) + \frac{\Delta}{M}F(t) \\
+            y(t) = x_1(t)
+        \end{cases}
+    \]
+
+    \paragraph{Step 3} State extension: transforming $c$ into a state variable and adding noises.
+    \[
+        x_3(t+1) = x_3(t) = {\color{red}c(t)}
+    \]
+    \[
+        \begin{cases}
+            x_1(t+1) = x_1(t) + \Delta x_2(t) + v_{11}(t) \\
+            x_2(t+1) = -\frac{K\Delta}{M}x_1(t) + \left(1-\frac{{\color{red}x_3(t)} \Delta }{M}\right)x_2(t) + \frac{\Delta}{M}F(t) + v_{12}(t) \\
+            {\color{red}x_3(t+1) = x_3(t) + v_{13}(t)} \\
+            y(t) = x_1(t) + v_2(t)
+        \end{cases}
+    \]
+
+    The system is ready for \gls{kf} application: we get at the same time $\hat{x}(t)$ and $\hat{c}(t)$.
+
+    \textbf{Note} We need the Extended Kalman Filter: even if the original system was linear, state extension moved it to a non-linear system (due to the multiplication of $x_3(t)$ and $x_2(t)$ in the second state equation). This is something that happens most of the times when doing the state extension.
+\end{exa}
+
+
+\section{\gls{gb} system identification using Simulation Error Method}
+
+Are there alternative ways to solve \acrlong{gb} system identification problems?
+A commonly (and intuitive) used method is parametric identification approach based on \emph{\gls{sem}}.
+
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block, align=center] at (0,0) (sys) {Physical model\\of $\Sc$\\with some\\unknown parameters ($\theta$)};
+        \draw[<-] (sys.west) -- ++(-1,0) node[left] {$u(t)$};
+        \draw[->] (sys.east) -- ++(1,0) node[right] {$y(t)$};
+    \end{tikzpicture}
+\end{figure}
+
+Following the classical 4 steps of a supervised parametric method for system identification:
+
+\paragraph{Step 1} Collect data from an experiment
+
+\begin{align*}
+    \{ \tilde{u}(1), \tilde{u}(2), \dots, \tilde{u}(N) \} \\
+    \{ \tilde{y}(1), \tilde{y}(2), \dots, \tilde{y}(N) \}
+\end{align*}
+
+\paragraph{Step 2} Define model structure
+\[
+    y(t) = \mathcal{M}(u(t); \bar{\theta}, \theta)
+\]
+Mathematical model (linear or non-linear) usually written from first principle equations. $\bar{\theta}$ is the set of \textbf{known parameters} (mass, resistance, \dots), $\theta$ is the set of \textbf{unknown parameters} (possibly with bounds).
+
+\paragraph{Step 3} Performance index definition (based on \emph{simulation error})
+\[
+    J_N(\theta) = \frac{1}{N} \sum_{t=1}^N \left( \tilde{y}(t) - \mathcal{M}(\tilde{u}(t); \bar{\theta}, \theta) \right)^2
+\]
+where $\tilde{y}(t)$ is the measured output and $\mathcal{M}(\tilde{u}(t); \bar{\theta}, \theta)$ is the simulated output.
+
+Therefore, $J_N(\theta)$ is the \emph{sample variance of the simulated output error}.
+
+\paragraph{Step 4} Optimization
+
+\[
+    \hat{\theta}_N = \argmin_\theta J_N(\theta)
+\]
+
+Notes about the optimization step:
+\begin{itemize}
+    \item usually no analytic expression of $J_N(\theta)$ is available
+    \item each computation of $J_N(\theta)$ requires an entire simulation of the model from $t=1$ to $t=N$
+    \item usually $J_N(\theta)$ is a non-quadratic and non-convex function. Iterative and randomized optimization methods must be used
+    \item therefore, it's intuitive but very computationally demanding
+\end{itemize}
+
+\subsection{Comparison of SEM with \gls{pem}}
+We can represent what we've just seen with the following scheme:
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \draw (0,2) rectangle ++(2,2);
+        \node[align=center] at (1,3) {True system\\$\Sc$};
+        \node[draw, ellipse, align=center] at (1,0) (m) {Simulator\\$\mathcal{M}(\theta)$};
+        \node[sum] at (4,0) (sum) {};
+        \node[block] at (4,-1.5) (J) {$J_N(\theta)$};
+
+        \draw[<-] (m) -- (-1,0) node[left] {$\tilde{u}(t)$};
+        \draw[->] (m) -- (sum) node[pos=0.9] {$+$} node[pos=0.5] {$\hat{y}(t; \theta)$};
+        \draw[->] (4,3) -- (sum) node[pos=0.9] {$-$};
+        \draw[->] (sum) -- (J) node[pos=0.5, right] {\footnotesize simulation error};
+        \draw[->] (J) -| (m);
+
+        \draw[<-] (0,3) -- (-1,3) node[left] {$\tilde{u}(t)$};
+        \draw[->] (2,3) -- (5,3) node[right] {$\tilde{y}(t)$};
+    \end{tikzpicture}
+\end{figure}
+
+In the \gls{bb} approach, we have seen something very similar but:
+\begin{itemize}
+    \item the model was \gls{bb}
+    \item the performance index was based on \emph{prediction error} (\gls{pem}).
+\end{itemize}    
+
+The general framework is similar, but with \gls{sem} we need the model of the system a-priori.
+\begin{exa}
+    We collect data $\{ \tilde{u}(1), \tilde{u}(2), \dots, \tilde{u}(N) \}$ and $\{ \tilde{y}(1), \tilde{y}(2), \dots, \tilde{y}(N) \}$, we want to estimate from data the I/O model.
+
+    \[
+        y(t) = \frac{b_0 + b_1z^{-1}}{1+a_1z^{-1} + a_2z^{-2}}u(t-1) \qquad \theta = \begin{bmatrix}
+            a_1 \\ a_2 \\ b_0 \\ b_1
+        \end{bmatrix}
+    \]
+
+    In time domain $y(t) = -a_1y(t-1)-a_2y(t-2)+b_0u(t-1)+b_1u(t-2)$.
+
+    Using \gls{pem}
+    \[
+        \hat{y}(t|t-1) = -a_1\hat{y}(t-1)-a_2\hat{y}(t-2)+b_0\hat{u}(t-1)+b_1\hat{u}(t-2)
+    \]
+    \begin{align*}
+        J_N(\theta) &= \frac{1}{N}\sum_{t=1}^N \left( \tilde{y}(t) - \hat{y}(t|t-1, \theta) \right)^2 \\
+        &= \frac{1}{N}\sum_{t=1}^N \left( \tilde{y}(t) +a_1\tilde{y}(t-1)+a_2\tilde{y}(t-2)-b_0\tilde{u}(t-1)-b_1\tilde{u}(t-2) \right)^2 \\
+    \end{align*}
+
+    Notice that it's a quadratic formula.
+
+    \begin{figure}[H]
+        \begin{minipage}[t]{0.5\textwidth}
+            \centering
+            \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+                \node[block] at (1,1) (zu) {$z^{-1}$};
+                \node[block] at (1,3.5) (zy) {$z^{-1}$};
+
+                \node at (0,2) (u) {$\tilde{u}(t)$};
+                \node at (0,4.5) (y) {$\tilde{y}(t)$};
+
+                \node[block,minimum height=5cm,minimum width=1.5cm,align=center] at (3.5,2.5) (sys) {Linear\\function\\of $\theta$};
+
+                \draw[->] (zu) -- (zu-|sys.west) node[pos=0.5] {$\scriptstyle\tilde{u}(t-1)$};
+                \draw[->] (u) -- (u-|sys.west);
+                \draw[->] (1,2) -- (zu);
+                \draw[->] (zy) -- (zy-|sys.west) node[pos=0.5] {$\scriptstyle\tilde{y}(t-1)$};
+                \draw[->] (y) -- (y-|sys.west);
+                \draw[->] (1,4.5) -- (zy);
+                \draw[->] (sys) -- ++(2,0) node[above] {$\scriptstyle\hat{y}(t|t-1)$};
+            \end{tikzpicture}
+            \caption*{\gls{pem}}
+        \end{minipage}
+        \begin{minipage}[t]{0.4\textwidth}
+            \centering
+            \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
+                \node[block] at (1,1) (zu) {$z^{-1}$};
+                \node[block] at (1,3) (zy) {$z^{-1}$};
+                \node[block] at (1,4.5) (zy2) {$z^{-1}$};
+
+                \node at (0,2) (u) {$\tilde{u}(t)$};
+
+                \node[block,minimum height=5cm,minimum width=1.5cm,align=center] at (3.5,2.5) (sys) {Linear\\function\\of $\theta$};
+
+                \draw[->] (zu) -- (zu-|sys.west) node[pos=0.5] {$\scriptstyle\tilde{u}(t-1)$};
+                \draw[->] (u) -- (u-|sys.west);
+                \draw[->] (1,2) -- (zu);
+
+                \draw[->] (zy) -- (zy-|sys.west) node[pos=0.5] {$\scriptstyle\hat{y}(t-2)$};
+                \draw[->] (zy2) -- (zy2-|sys.west) node[pos=0.5] {$\scriptstyle\hat{y}(t-1)$};
+                \draw[->] (zy2) -- (zy);
+                \draw[->] (4.8,2.5) -- (4.8,5.5) -- (1,5.5) -- (zy2);
+
+                \draw[->] (sys) -- ++(2,0) node[above] {$\scriptstyle\hat{y}(t|t-1)$};
+            \end{tikzpicture}
+            \caption*{\gls{sem}}
+        \end{minipage}
+    \end{figure}
+
+    Using \gls{sem}
+    \[
+        \hat{y}(t|t-1) = -a_1\hat{y}(t-1)-a_2\hat{y}(t-2)+b_0\tilde{u}(t-1)+b_1\tilde{u}(t-2)
+    \]
+    \begin{align*}
+        J_N(\theta) &= \frac{1}{N}\sum_{t=1}^N \left( \tilde{y}(t) - \hat{y}(t|t-1, \theta) \right)^2 \\
+        &= \frac{1}{N}\sum_{t=1}^N \left( \tilde{y}(t) +a_1\hat{y}(t-1)+a_2\hat{y}(t-2)-b_0\tilde{u}(t-1)-b_1\tilde{u}(t-2) \right)^2 \\
+    \end{align*}
+
+    Notice that it's non-linear with respect to $\theta$.
+\end{exa}
+
+\gls{pem} approach looks much better, but do not forget the noise! \gls{pem} is much less robust w.r.t. noise: we must include a model of the noise in the estimated model.
+We use \gls{armax} models.
+
+If we use \gls{arx} models:
+\[
+    y(t) = \frac{b_0+b_1z^{-1}}{1+a_1z^{-1}+a_2z^{-2}}u(t-1) + \frac{1}{1+a_1z^{-1}+a_2z^{-2}}e(t)
+\]
+\[
+    \hat{y}(t|t-1) = b_0u(t-1)+b_1u(t-2) - a_1y(t-1)-a_2y(t-2)
+\]
+
+If we use \gls{armax} models the numerator of the \gls{tf} for $e(t)$ is $1+c_1z^{-1}+\ldots+c_mz^{-m}$, in this case $J_N(\theta)$ is non-linear.
+This leads to the same complexity of \gls{sem}.
+
+The second problem of \gls{pem} is high sensitivity to sampling time choice.
+Remember that when we write at discrete time $y(t)$ we mean $y(t\cdot \Delta)$.
+
+\[
+    \hat{y}(t|t-1) = -a_1\tilde{y}(t-1)-a_2\tilde{y}(t-2) + b_0\tilde{u}(t-1)+b_1\tilde{u}(t-2)
+\]
+
+If $\Delta$ is very small the difference between $\tilde{y}(t)$ and $\tilde{y}(t-1)$ becomes very small.
+The effect is that the \gls{pem} optimization ends to provide this \emph{trivial} solution:
+\[
+    a_1 = -1 \qquad a_2 \rightarrow 0 \qquad b_0 \rightarrow 0 \qquad b_1 \rightarrow 0 \qquad \Rightarrow \qquad \tilde{y}(t) \approx \tilde{y}(t-1)
+\]
+
+This is a wrong model due to the fact that the recursive part of the model is using past measures of the output instead of past values of the model outputs.
+
+\section{Summary of system identification methods for I/O systems}
+
+The problem is to estimate the model of a system $\Sc$ that, in general, could be non-linear and dynamic.
+\begin{figure}[H]
+    \centering
+    \begin{tikzpicture}[node distance=2cm,auto,>=latex']
+        \node[block, dashed border, minimum width=1.5cm, minimum height=1.5cm] at (0,0) (sys) {$\Sc$};
+        \draw[<-] (sys) -- ++(-1.5,0) node[left] {$u(t)$};
+        \draw[->] (sys) -- ++(1.5,0) node[right] {$y(t)$};
+    \end{tikzpicture}
+\end{figure}
+
+In order to do that we have to:
+\begin{itemize}
+    \item collect a dataset for training (supervised method): can be with or without design of the experiment
+    \item choose a model domain (linear static/non-linear static/linear dynamic/non-linear dynamic), using \gls{gb} or \gls{bb} approach
+    \item choose an estimation framework: constructive (4SID), parametric (\gls{pem} or \gls{sem}) or filtering (state extension of \gls{kf})
+\end{itemize}
+
+\begin{rem}[Final question]
+    For system identification and SW-sensing is better \acrlong{bb} or \acrlong{wb}?
+
+    It depends on the goals and type of applications:
+
+    \begin{itemize}
+        \item \gls{bb} is very general and very flexible, make maximum use of data and no or little need of domain know-how 
+        \item \gls{wb} is very useful when you are the system-designer (not only the control algorithm designer): it can provide more insight in the system.
+        \item \gls{gb} can sometimes be obtained by hybrid systems (part is black-box and part is white-box).
+    \end{itemize}
+\end{rem}
\ No newline at end of file
diff --git a/lectures/template.tex b/lectures/template.tex
index 3bb316e..59d81a2 100644
--- a/lectures/template.tex
+++ b/lectures/template.tex
@@ -1,81 +1 @@
-\chapter{Template}
-\section{Table}
-
-\begin{table}[htp]
-    \centering
-    \begin{tabular}{r|l|p{10cm}}
-        Right &  Left  &  Longlonglonglonglonglonglonglong longlonglonglonglonglonglonglonglonglonglonglonglong longlonglonglonglonglong \\
-        Right &  Left  &  Longlonglonglonglonglonglong
-        longlonglonglonglonglonglonglong
-        longlonglonglong
-        longlonglonglonglonglonglonglong
-    \end{tabular}
-    \caption{This is a caption}
-    \label{tab:trans-sym}
-\end{table}
-
-\section{List}
-This is a List:
-\begin{itemize}
-    \item \textbf{Bullet 1}: Bullet 1 is bullet 1.
-    \item \textbf{Bullet 2}: Bullet 2 is bullet 2.
-\end{itemize}
-
-\section{Definition}
-\begin{definition}\label{def:def1}
-\textbf{DEFINITION NAME}: This is a definition.
-\end{definition}
-
-% avoid bad break
-\vspace{5cm}
-
-\section{Theorem}
-\begin{theo}[THEOREM NAME]{theo:theo1}
-This is a theorm. Below are equations.
-\begin{align}\label{eq:multi-equations}
-    \psi(\bvec{a}) &= A\cdot \bvec{a} + \bvec{t}.\\
-    R_x &=  \begin{bmatrix}
-            0 & \cos(\theta) & -\sin(\theta)\\
-            0 & \sin(\theta) & \cos(\theta)\\
-            1 & 0 & 0
-         \end{bmatrix},
-    R_y =  \begin{bmatrix}
-            \cos(\theta) & 0 & -\sin(\theta)\\
-            \sin(\theta) & 0 & \cos(\theta)\\
-            0 & 1 & 0
-         \end{bmatrix},
-    R_z =  \begin{bmatrix}
-            \cos(\theta) & -\sin(\theta) & 0\\
-            \sin(\theta) & \cos(\theta) & 0 \\
-            0 & 0 & 1
-         \end{bmatrix}
-\end{align}
-\end{theo}
-
-\begin{lem}[LEMMA NAME]{lem:leml}
-This is a lemma
-\end{lem}
-
-\begin{prf}[LEMMA NAME]{prf:leml}
-This is a proof.
-\end{prf}
-
-\section{Tikz Pictures}
-\begin{figure}[htp]
-    \centering
-        \begin{tikzpicture}[scale=0.6]
-            \draw[->] (0,-1)--(0,1.5)node[above] {$s$};
-            \draw[->] (-0.8,0.6) to[bend right] (0.8,0.6);
-            \draw[->] (0.9, 0.8)--(0.9, 1.2) node[right] {$\omega$};
-            \filldraw[dashed] (0,-0.2)--(0.9, -0.3) circle (1pt) node [right] {$q$};
-            \draw[->] (0.8, -0.9)--(1.2, -0.8) node[right] {$v$};
-        \end{tikzpicture}
-    \caption{This is a caption. }
-    \label{fig:rotation}
-\end{figure}
-
-
-
-
-
-\curinstructor{Ins Tructor1}
+%!TEX root = ../main.tex
diff --git a/lectures/2020-04-16.tex b/lectures_AY2020/2020-04-16.tex
similarity index 97%
rename from lectures/2020-04-16.tex
rename to lectures_AY2020/2020-04-16.tex
index df3ff0e..32fa842 100644
--- a/lectures/2020-04-16.tex
+++ b/lectures_AY2020/2020-04-16.tex
@@ -62,9 +62,9 @@ \subsection{Super summary of MIDA 1}
 The model is indicated as $\mathcal{M}(\theta)$ where $\theta$ is the parameter vector, the coefficients of $A(z)$, $B(z)$, $C(z)$.
 
 A \textbf{parametric identification method} has been used: the \emph{performance index is defined}
-\begin{definition}
+\begin{defn}
     $J(\theta) = \frac{1}{N} \sum_{t=1}^N \left(y(t) - \hat{y}(t|t-1, \theta)\right)^2$
-\end{definition}
+\end{defn}
 
 Which is the variance of the \emph{prediction error} made by the model. The optimal $\theta$ is $\hat{\theta}_N = \argmin_\theta J(\theta)$
 
@@ -84,9 +84,9 @@ \subsection{MIDA 2}
 
 \section{Motivation example for the course: ABS}
 
-\begin{definition}
+\begin{defn}
     \textbf{Slip} of the wheel: $\lambda = \frac{v-\omega r}{v}$
-\end{definition}
+\end{defn}
 
 During a break $0 \le \lambda \le 1$ (from free rolling wheel to locked wheel).
 \begin{figure}[H]
@@ -181,7 +181,7 @@ \chapter{Black-box non-parametric identification of I/O systems using state-spac
     \left\{y(1), y(2), \ldots, y(N)\right\} &\quad \text{(output)}
 \end{align*}
 
-\begin{remark}[general path of a parametric identification methods]
+\begin{rem}[general path of a parametric identification methods]
 
 \begin{enumerate}
     \item Collect data: $\left\{u(1), u(2), \ldots, u(N)\right\}$, $\left\{y(1), y(2), \ldots, y(N)\right\}$
@@ -194,7 +194,7 @@ \chapter{Black-box non-parametric identification of I/O systems using state-spac
 
 $\mathcal{M}(\theta_1)$ is better than $\mathcal{M}(\theta_2)$ if $J(\theta_1) < J(\theta_2)$.
 
-\end{remark}
+\end{rem}
 
 In this chapter we are presenting a totally different system identification approach: \textbf{not parametric}.
 \begin{itemize}
@@ -243,11 +243,11 @@ \subsection{Representation \#1: state-space}
 
 Assuming 1 input and 1 output, it can be extended for multiple inputs and outputs. Usually $D=0$ for \emph{strictly-proper systems}.
 
-\begin{remark}[S.S representation is not unique]
+\begin{rem}[S.S representation is not unique]
     $F_1 = TFT^{-1}$, $G_1 = TG$, $H_1 = HT^{-1}$, $D_1 = D$ for any invertible matrix $T$. The system $\{F, G, H, D\}$ is equivalent to $\{F_1, G_1, H_1, D_1\}$.
-\end{remark}
+\end{rem}
 
-\begin{example}
+\begin{exa}
     \[
         \begin{cases}
             x_1(t+1) = \frac{1}{2} x_1(t) + 2u(t) \\
@@ -276,7 +276,7 @@ \subsection{Representation \#1: state-space}
         & \qquad
         D = 0
     \end{align*}
-\end{example}
+\end{exa}
 
 \subsection{Representation \#2: transfer-function}
 
@@ -288,7 +288,7 @@ \subsection{Representation \#2: transfer-function}
 
 It's very easy to move from T.F. representation to a time domain description of the system.
 
-\begin{example}
+\begin{exa}
     \begin{align*}
         & y(t) = \underbrace{\begin{bmatrix}
             \frac{1+\frac{1}{2}z^{-1}}{2+\frac{1}{3}z^{-1}+\frac{1}{4}z^{-2}} z^{-1}
@@ -297,8 +297,8 @@ \subsection{Representation \#2: transfer-function}
         & y(t) = \underbrace{-\frac{1}{6}y(t-1) - \frac{1}{8}y(t-2)}_\text{old values of $y(t)$} + \underbrace{\frac{1}{2}u(t-1) + \frac{1}{4}u(t-2)}_\text{old values of input}
     \end{align*}
 
-\end{example}
-\begin{remark}[Notational remark]
+\end{exa}
+\begin{rem}[Notational remark]
     $\displaystyle W(z) = \frac{z^{-1}}{1 + \frac{1}{3}z^{-1}}$ is called an IIR (\emph{Infinite Impulse Response}) filter.\\
     $\displaystyle W(z) = z^{-1} + \frac{1}{2}z^{-2} + \frac{1}{4}z^{-3}$ is called a FIR (\emph{Finite Impulse Response}) filter.
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-04-20.tex b/lectures_AY2020/2020-04-20.tex
similarity index 98%
rename from lectures/2020-04-20.tex
rename to lectures_AY2020/2020-04-20.tex
index 8d8492e..f935a2a 100644
--- a/lectures/2020-04-20.tex
+++ b/lectures_AY2020/2020-04-20.tex
@@ -1,5 +1,5 @@
 \newlecture{Sergio Savaresi}{20/04/2020}
-\begin{remark}[Strictly proper systems]
+\begin{rem}[Strictly proper systems]
     Notice that for strictly proper systems the delay $k \ge 1$
     \begin{figure}[H]
         \begin{minipage}[t]{0.5\textwidth}
@@ -30,7 +30,7 @@
             \end{tikzpicture}
         \end{minipage}
     \end{figure}
-\end{remark}
+\end{rem}
 
 \subsection{Representation \#3: convolution of the input with the Impulse Response (IR)}
 The third way to represent a system is through the convolution of the input with the \emph{Impulse Response (IR)}.
@@ -120,7 +120,7 @@ \subsection{State Space to Transfer Function}
 Thus, the transfer function is
 \[ W(z) = H(zI - F) ^ {-1} G \]
 
-\begin{example}
+\begin{exa}
 \begin{align*}
     F = \begin{bmatrix}
         1 & 0\\
@@ -193,7 +193,7 @@ \subsection{State Space to Transfer Function}
 \end{align*}
 Notice that in this case we only have one pole, but the system is of order two; this comes from the fact that part of the system is non observable.
 
-\end{example}
+\end{exa}
 
 \subsection{Transfer Function to State Space}
 This conversion is not very used in practice and it is called the \emph{realization} of a transfer function into a state space model.
@@ -229,7 +229,7 @@ \subsubsection{Control realization}
     &&
     D = 0
 \end{align*}
-\begin{example}
+\begin{exa}
     Consider the transfer function $W(z)$
     \[ W(z) = \frac{2z^2 + \frac{1}{2}z + \frac{1}{4}}{z^3 + \frac{1}{4}z^2 + \frac{1}{3}z + \frac{1}{5}} \]
     The control realization is
@@ -252,11 +252,11 @@ \subsubsection{Control realization}
         &&
         D = 0
     \end{align*}
-\end{example}
+\end{exa}
 
 \subsection{Transfer Function to Impulse Response}
 To get the IR from a transfer function $W(z)$ is sufficient to make the $\infty$-long division between the numerator and denominator of $W(z)$
-\begin{example}
+\begin{exa}
     Consider the transfer function
     \[ W(z) = \frac{1}{z-\frac{1}{2}} = \frac{z^{-1}}{1-\frac{1}{2}z^{-1}}
         = 0 z^{-0} + 1 z^{-1} + \frac{1}{2}z^{-2} + \frac{1}{4}z^{-3} + \cdots \]
@@ -267,22 +267,22 @@ \subsection{Transfer Function to Impulse Response}
     Remembering that for geometric series we have \[ \sum_{k = 0}^{\infty} a^k = \frac{1}{1-a} \text{ if } |a| < 1 \]
     We can rewrite $y(t)$ as follows
     \[ y(t) = \left( z^{-1} \sum_{k=0}^{\infty} \left( \frac{1}{2} z^{-1} \right)^{k} \right) u(t) = \left( 0 + 1 z^{-1} + \frac{1}{2}z^{-2} + \frac{1}{4}z^{-3} + \cdots \right) u(t) \]
-\end{example}
+\end{exa}
 
 \subsection{Impulse Response to Transfer Function}
-\begin{definition}
+\begin{defn}
     Given a discrete-time signal $s(t)$ such that $\forall t < 0: s(t) = 0$, its \emph{Z-Transform} is defined as
     \[ \mathcal{Z} \left( s(t) \right) = \sum_{t = 0}^{\infty} s(t) z^{-t} \]
-\end{definition}
+\end{defn}
 Given this, it can be proven that
 \[ W(z) = \mathcal{Z}\left( \omega(t) \right) = \sum_{t = 0}^{\infty} \omega(t) z^{-t} \]
 This means that the transfer function of a system is the $\mathcal{Z}$-transform of a special signal, that is the impulse response of the system.
 
-\begin{remark}
+\begin{rem}
     This formula cannot be used in practice to transform an IR representation to a TF representation.
     This is because we need infinite points of the impulse response, and the impulse response must be noise-free.
     Thus, this transformation is only theoretical.
-\end{remark}
+\end{rem}
 
 \subsection{State Space to Impulse Reponse}
 Consider the following state space model, with initial conditions $x(0) = 0$ and $y(0) = 0$
@@ -366,11 +366,11 @@ \section{Subspace-based State Space System Identification (4SID)}
         \item $\eta(t)$ is the measurement noise (e.g. WN)
     \end{itemize}
 \end{enumerate}
-\begin{remark}
+\begin{rem}
     We will see in detail only 4SID when the experiment is an impulse-experiment, which is the first and original version of 4SID.
     However 4SID can be extended to any generic input signal $\left\{ u(1), u(2), \cdots, u(N) \right\}$ that is sufficiently exciting.
-\end{remark}
-\begin{remark}[Unstable system]
+\end{rem}
+\begin{rem}[Unstable system]
     In case of an unstable system the measurements must be collected in a closed-loop experiment.
     Indeed, if the experiment was open-loop, the experiment would be unfeasible.
 
@@ -395,4 +395,4 @@ \section{Subspace-based State Space System Identification (4SID)}
         \end{tikzpicture}
         \caption*{Closed loop system}
     \end{figure}
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-04-21.tex b/lectures_AY2020/2020-04-21.tex
similarity index 98%
rename from lectures/2020-04-21.tex
rename to lectures_AY2020/2020-04-21.tex
index de2a65f..18d047f 100644
--- a/lectures/2020-04-21.tex
+++ b/lectures_AY2020/2020-04-21.tex
@@ -50,14 +50,14 @@ \subsection{Fully Controllable}
 
 $R$ is also called \emph{reachability} matrix.
 
-\begin{remark}
+\begin{rem}
     \begin{description}
         \item[Observability] we can observe the state from the output sensors.
         \item[Controllability] we can control/move/influence the state using the input signal.
     \end{description}
-\end{remark}
+\end{rem}
 
-\begin{example}
+\begin{exa}
     \begin{align*}
         \begin{cases}
             x_1(t+1) = \frac{1}{2} x_1(t) + u(t) \\
@@ -104,9 +104,9 @@ \subsection{Fully Controllable}
         \rank R = 1 < n = 2
         \quad\implies\quad \text{not fully controllable}
     \]
-\end{example}
+\end{exa}
 
-\begin{remark}[4 sub-systems]
+\begin{rem}[4 sub-systems]
     Each system can be internally seen as 4 sub-systems as follows:
 
     \begin{figure}[H]
@@ -151,7 +151,7 @@ \subsection{Fully Controllable}
             \draw[->] (oc) -- (out);
         \end{tikzpicture}
     \end{figure}
-\end{remark}
+\end{rem}
 
 \section{Hankel matrix of order n}
 
@@ -260,9 +260,9 @@ \section{Algorithm to obtain $\hat{F}$, $\hat{G}$, $\hat{H}$ from a noise-free m
 
 In conclusion in a simple and constructive way we have estimated a State Space model of the system $\{\hat{H}, \hat{G}, \hat{F}\}$ starting from measured IR, using only $2n+1$ samples of IR.
 
-\begin{remark}
+\begin{rem}
     If the measurement is noisy all this process is useless.
-\end{remark}
+\end{rem}
 
 \section{Real problem}
 
@@ -307,7 +307,7 @@ \section{4SID procedure (with noise)}
 
 $\tilde{H}_{qd}$ is a $q\times d$ matrix. \textbf{Note} that $q+d-1$ must equal to $N$ so that we use all the data set.
 
-\begin{remark}[Choice of $q$ and $d$]
+\begin{rem}[Choice of $q$ and $d$]
     Hypothesis: $q<d$ so that $q+d-1=N$, so $q=N+1-d$.
     \begin{figure}[H]
         \centering
@@ -333,4 +333,4 @@ \section{4SID procedure (with noise)}
     If $q \ll d$ it's computationally less intensive.
 
     If $0.6d < q < d$ we get to a good enough result.
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-04-22.tex b/lectures_AY2020/2020-04-22.tex
similarity index 97%
rename from lectures/2020-04-22.tex
rename to lectures_AY2020/2020-04-22.tex
index 1baa7dd..17ed99b 100644
--- a/lectures/2020-04-22.tex
+++ b/lectures_AY2020/2020-04-22.tex
@@ -24,16 +24,16 @@
 Where $\sigma_1$, $\sigma_2$, $\ldots$, $\sigma_q$ are the singular values of $\tilde{H}_{qd}$.
 Those are real, positive numbers, sorted in decreasing order ($\sigma_1 \ge \sigma_2 \ge \cdots \ge \sigma_q$).
 
-\begin{remark}
+\begin{rem}
     The singular values of a rectangular matrix are a \emph{sort of} eigenvalues of a square matrix.\\
     SVD is a \emph{sort of} diagonalization of a rectangular matrix.
-\end{remark}
+\end{rem}
 
-\begin{remark}
+\begin{rem}
     For a square matrix, $\text{eig}(A) = \text{roots}(\det(A-\lambda I))$. If $M$ is rectangular, $SV(M) = \sqrt{\text{eig}(MM^T)}$ (for non zero eigenvalues).
-\end{remark}
+\end{rem}
 
-\begin{remark}[How to compute SVD]
+\begin{rem}[How to compute SVD]
     The optimal numerical computation is not trivial. Use \texttt{svd(M)} in Matlab.
 
     Theoretical method for SVD computation is to make 2 diagonalization steps:
@@ -43,7 +43,7 @@
     \[
         \underbrace{\tilde{H}_{qd}^T \tilde{H}_{qd}}_{d\times d} = \tilde{V}\tilde{S}^T\tilde{S}\tilde{V}^T
     \]
-\end{remark}
+\end{rem}
 
 \paragraph{Step 3} Plot the singular values and cut-off the 3 matrices.
 
@@ -201,15 +201,15 @@
 
 \paragraph{Conclusion} Starting from a noisy I.R. $\{\widetilde{\omega}(1), \widetilde{\omega}(2), \ldots, \widetilde{\omega}(N)\}$ we have estimated a model $\{\hat{F}, \hat{G}, \hat{H}\}$ in a non parametric and constructive way.
 
-\begin{remark}
+\begin{rem}
     This method can be extended also to the case where the input signal is generic (i.e. not an impulse).
-\end{remark}
+\end{rem}
 
-\begin{remark}[Optimality of 4SID]
+\begin{rem}[Optimality of 4SID]
     The method is optimal in the sense that it makes the best possible rank reduction of $\tilde{H}_{qd}$.
-\end{remark}
+\end{rem}
 
-\begin{example}[Rank reduction]
+\begin{exa}[Rank reduction]
     In general there are infinite ways to make a rank reduction.
     \[
         \underbrace{\begin{bmatrix}
@@ -231,7 +231,7 @@
         \end{bmatrix}
     \]
     It's not the optimal rank reduction matrix, but it factors out a matrix with lower rank.
-\end{example}
+\end{exa}
 
 Our goal is to obtain the desired rank reduction by discarding the minimum amount of information contained in the original matrix.
 SVD makes exactly this: $\tilde{H}_{res,qd}$ is the minimum possible (in the sense of the \emph{Frobenius norm}).
@@ -239,18 +239,18 @@
     \left|\tilde{H}_{res,qd}\right|_F = \sqrt{\sum_{ij} \left(\tilde{H}_{res,qd}^{(ij)} \right)^2}
 \]
 
-\begin{remark}
+\begin{rem}
     4SID is a constructive method that can be implemented in a fully-automatic way, except for these steps:
     \begin{itemize}
         \item $q$ and $d$ selection (not critical)
         \item Choice of $n$ (typically supervised by the designer). It can be made automatic using a cross-validation method.
     \end{itemize}
-\end{remark}
+\end{rem}
 
-\begin{remark}
+\begin{rem}
     SVD was an historical turning point in machine learning algorithms because it allows:
     \begin{itemize}
         \item Very efficient compression of information.
         \item Very efficient separation of \emph{important} information from noise.
     \end{itemize}
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-04-23.tex b/lectures_AY2020/2020-04-23.tex
similarity index 100%
rename from lectures/2020-04-23.tex
rename to lectures_AY2020/2020-04-23.tex
diff --git a/lectures/2020-04-27.tex b/lectures_AY2020/2020-04-27.tex
similarity index 98%
rename from lectures/2020-04-27.tex
rename to lectures_AY2020/2020-04-27.tex
index 58069c7..4c83675 100644
--- a/lectures/2020-04-27.tex
+++ b/lectures_AY2020/2020-04-27.tex
@@ -7,7 +7,7 @@
     \item Fit the estimated and modeled frequency response to obtain the optimal model
 \end{itemize}
 
-\begin{example}[Car steer dynamics]
+\begin{exa}[Car steer dynamics]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
@@ -66,7 +66,7 @@
         \item The driver controls $\delta_F$ which is a measurable disturbance, the system controls the rear steer
         \item Both $\delta_R$ and $\delta_F$ are control variables: application high performance autonomous car
     \end{enumerate}
-\end{example}
+\end{exa}
 
 \paragraph{Step 1} In the experiment design step we first have to select a set of excitation frequencies.
 
@@ -147,7 +147,7 @@
 \end{figure}
 
 
-\begin{remark}
+\begin{rem}
     The amplitudes $A_1$, $A_2$, \ldots, $A_H$ can be equal (constant) or, more frequently, they decrease as the frequency increases to fulfill the power constraint on the input.
 
     \paragraph{Example} $\delta(t)$ is the steering angle (moved by an actuator).
@@ -157,7 +157,7 @@
 
     If we have a limit to this power, this power should be constant during the $H$ experiments.
     \[KA_i^2\omega_i = \text{const} \qquad A_i=\sqrt{\frac{\text{const}}{K\omega_i}}\]
-\end{remark}
+\end{rem}
 
 Focusing on the $i$-th experiment.
 \begin{figure}[H]
@@ -178,9 +178,9 @@
     \end{tikzpicture}
 \end{figure}
 
-\begin{remark}
+\begin{rem}
     If the system is LTI (linear time-invariant), the frequency response theorem says the if the input is a sine input of frequency $\omega_i$ the output must be a sine with frequency $\omega_i$.
-\end{remark}
+\end{rem}
 
 However $y_i(t)$ in real applications is not a perfect sinusoid.
 \begin{itemize}
@@ -274,10 +274,10 @@
     \end{bmatrix}
 \]
 
-\begin{remark}[Model order selection]
+\begin{rem}[Model order selection]
     In this case the order is composed by 2 parameters $n$ and $p$.
     Use cross-validation approach (or visual fitting in the Bode diagram).
-\end{remark}
+\end{rem}
 
 \paragraph{Step 3} New performance index (frequency domain).
 
@@ -293,7 +293,7 @@
 
 Usually $J_H(\theta)$ is a non-quadratic and non-convex function, iterative optimization methods are needed.
 
-\begin{remark}[Frequency bandwidth selection $\omega_H =\; ?$]
+\begin{rem}[Frequency bandwidth selection $\omega_H =\; ?$]
     Theoretically the standard best solution should be $H$ points distributed uniformly from 0 to $\Omega_N$ (Nyquist).
 
     In practice it's better to concentrate the experimental effort in a smaller and more focused bandwidth.
@@ -313,4 +313,4 @@
     A rule of thumb: $\omega_H \approx 3\omega_c$
 
     \paragraph{Example} The ESC (electronic stability control) has an expected bandwidth of $\omega_c \approx 2 \text{Hz}$, so $\omega_H \approx 6\text{Hz}$.
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-04-30.tex b/lectures_AY2020/2020-04-30.tex
similarity index 98%
rename from lectures/2020-04-30.tex
rename to lectures_AY2020/2020-04-30.tex
index 2b0afa2..262d67e 100644
--- a/lectures/2020-04-30.tex
+++ b/lectures_AY2020/2020-04-30.tex
@@ -1,6 +1,6 @@
 \newlecture{Sergio Savaresi}{30/04/2020}
 
-\begin{remark}[Emphasis on special frequency range]
+\begin{rem}[Emphasis on special frequency range]
     In some cases, between $\omega_1$ and $\omega_H$, we want to be more accurate in system identification in some frequency regions (typically around cut-off-frequency, around resonances).
 
     We can use different weights for different frequencies.
@@ -30,9 +30,9 @@
     \]
 
     Another \emph{trick}: more dense $\omega_i$ spacing in the frequency region of special interest (not really used).
-\end{remark}
+\end{rem}
 
-\begin{remark}[Single experiment]
+\begin{rem}[Single experiment]
     Sometimes the set of $H$ independent single-sinusoid experiments can be replaced by a long single ``sine-sweep'' experiment.
 
     \begin{figure}[H]
@@ -54,7 +54,7 @@
 
     We can fit the estimated $\hat{W}(e^{j\omega})$ with the model frequency response $W(e^{j\omega}, \theta)$ in the performance index.
     This experiment is quicker but has usually a lower signal-to-noise-ration.
-\end{remark}
+\end{rem}
 
 \section{Comparison between time domain (ARMAX) and frequency domain parametric methods}
 
diff --git a/lectures/2020-05-04.tex b/lectures_AY2020/2020-05-04.tex
similarity index 97%
rename from lectures/2020-05-04.tex
rename to lectures_AY2020/2020-05-04.tex
index 2e3d76b..4cd78c7 100644
--- a/lectures/2020-05-04.tex
+++ b/lectures_AY2020/2020-05-04.tex
@@ -7,7 +7,7 @@
     \item Quality of estimation error (\emph{noise of measurement} for the software sensor).
 \end{itemize}
 
-\begin{example}[Slip estimation for ABS/traction control]
+\begin{exa}[Slip estimation for ABS/traction control]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
@@ -127,9 +127,9 @@
             \draw[->] (algo) -- (algoout);
         \end{tikzpicture}
     \end{figure}
-\end{example}
+\end{exa}
 
-\begin{example}[State of charge estimation of a battery]
+\begin{exa}[State of charge estimation of a battery]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=2.5cm,auto,>=latex']
@@ -156,7 +156,7 @@
     \]
     Where $I$ is the total amount of \emph{current} that can be extracted by the user of the battery.
     This solution is not feasible since it integrates the noise on $i(t)$.
-\end{example}
+\end{exa}
 
 \section{Kalman Filter on Basic Systems}
 
@@ -277,7 +277,7 @@ \subsection{Basic Solution}
     P(1) = \text{var}[x(1)] = P_0 & \qquad\text{DRE}
 \end{align*}
 
-\begin{remark}[Structure or $K(t)$ and D.R.E]
+\begin{rem}[Structure or $K(t)$ and D.R.E]
     Notice that $K(t)$ and DRE have  a \emph{blockset} structure having this form: $AP(t)B^T+N$
 
     There are 3 different types of blocks:
@@ -291,25 +291,25 @@ \subsection{Basic Solution}
         \text{gain} \qquad& (\text{mix})(\text{output})^{-1} \\
         \text{DRE} \qquad& (\text{state}) - (\text{mix})(\text{output})^{-1}(\text{mix})^T
     \end{align*}
-\end{remark}
+\end{rem}
 
-\begin{remark}[Riccati equation]
+\begin{rem}[Riccati equation]
     Riccati equation is a special type of nonlinear matrix difference equation.
 
     Notice that DRE is an autonomous, non-linear, discrete time, multi-variable system, described by a non-linear difference matrix equation.
     \[
         \text{DRE: } P(t+1) = f(P(t)) \qquad P(1) = P_0
     \]
-\end{remark}
+\end{rem}
 
-\begin{remark}[Existance of DRE]
+\begin{rem}[Existance of DRE]
     In order to guarantee the existance of DRE for all $t$ the only critical part is the inversion of the \emph{output} block:
     \[
         ( \underbrace{HP(t)H^T}_{\ge 0} + \underbrace{V_2}_{>0})^{-1} \qquad \text{Thanks to $V_2>0$ it's invertible}
     \]
-\end{remark}
+\end{rem}
 
-\begin{remark}[Meaning of $P(t)$]
+\begin{rem}[Meaning of $P(t)$]
     It can be proven that $P(t)$ has a very important meaning.
 
     \[
@@ -317,4 +317,4 @@ \subsection{Basic Solution}
     \]
 
     $P(t)$ is the covariance of the 1-step prediction error of the state.
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-05-05.tex b/lectures_AY2020/2020-05-05.tex
similarity index 99%
rename from lectures/2020-05-05.tex
rename to lectures_AY2020/2020-05-05.tex
index 64e662d..0a6f282 100644
--- a/lectures/2020-05-05.tex
+++ b/lectures_AY2020/2020-05-05.tex
@@ -164,11 +164,11 @@ \subsection{Filter ($\hat{x}(t|t)$)}
     \text{DRE}& \text{ unchanged}
 \end{align*}
 
-\begin{remark}
+\begin{rem}
     These equations are valid under the restrictive assumption $V_{12} = 0$.
-\end{remark}
+\end{rem}
 
-\begin{remark}
+\begin{rem}
     Gain of K.F. in prediction form:
     \[
         K(t) = \left( FP(t)H^T \right) \left( HP(t)H^T+V_2 \right)^{-1}
@@ -178,7 +178,7 @@ \subsection{Filter ($\hat{x}(t|t)$)}
     \[
         K_0(t) = \left( \underline{\phantom{F}} P(t)H^T \right) \left( HP(t)H^T+V_2 \right)^{-1}
     \]
-\end{remark}
+\end{rem}
 
 
 \subsection{Time-varying systems}
@@ -206,12 +206,12 @@ \section{Asymptotic Solution of K.F.}
     \item Computational problem: $K(t)$ must be computed at each sampling time (e.g. every 5ms), including the inversion of $HP(t)H^T+V_2$ ($p\times p$ matrix).
 \end{itemize}
 
-\begin{remark}
+\begin{rem}
     For LTI: $x(t+1) = Fx(t) + Gu(t)$ the stability check considers the eigenvalues of $F$.
 
     For LTV: $x(t+1) = F(t)x(t) + G(t)u(t)$, even if all the eigenvalues of $F(t)$ are strictly inside the unit circle at any time, the system is not guaranteed to be asymptotically stable.
     In practice it is, if the time-variations are \emph{slow}, like in aging.
-\end{remark}
+\end{rem}
 
 Because of those problems in real/practical applications the asymptotic version of K.F. is preferred.
 
@@ -253,11 +253,11 @@ \section{Asymptotic Solution of K.F.}
 
 If $\overline{K}$ exists, the K.F. is asymptotically stable if and only if all the eigenvalues of $F-\overline{K}H$ are strictly inside the unit circle.
 
-\begin{remark}
+\begin{rem}
     The stability of the system $S$ is related to matrix $F$, whereas the stability of K.F. is related to matrix $F-\overline{K}H$.
 
     K.F. can be asymptotically stable even if the system is unstable.
-\end{remark}
+\end{rem}
 
 \paragraph{Existance of $\overline{K}$}
 
diff --git a/lectures/2020-05-07.tex b/lectures_AY2020/2020-05-07.tex
similarity index 99%
rename from lectures/2020-05-07.tex
rename to lectures_AY2020/2020-05-07.tex
index 31aec4e..7e92553 100644
--- a/lectures/2020-05-07.tex
+++ b/lectures_AY2020/2020-05-07.tex
@@ -13,7 +13,7 @@
     \item The corresponding $\overline{K}$ is s.t. the K.F. is asymptotically stable
 \end{itemize}
 
-\begin{recall}
+\begin{rem}
     \paragraph{Observability} the pair $(F, H)$ is observable if and only if
     \[
         O = \begin{bmatrix}
@@ -49,7 +49,7 @@
             \Gamma & F\Gamma & \cdots & F^{n-1}\Gamma
         \end{bmatrix}
     \]
-\end{recall}
+\end{rem}
 
 \paragraph{Second asymptotic theorem}
 
diff --git a/lectures/2020-05-11.tex b/lectures_AY2020/2020-05-11.tex
similarity index 99%
rename from lectures/2020-05-11.tex
rename to lectures_AY2020/2020-05-11.tex
index a5df5e0..667e160 100644
--- a/lectures/2020-05-11.tex
+++ b/lectures_AY2020/2020-05-11.tex
@@ -1,6 +1,6 @@
 \newlecture{Sergio Savaresi}{11/05/2020}
 
-\begin{remark}[White noise]
+\begin{rem}[White noise]
     In the formulas of Kalman Filter there is a requirement that $v_1(t)$ and $v_2(t)$ must be white noises.
     In many practical applications this assumption can be too demanding.
 
@@ -61,7 +61,7 @@
     \]
 
     We can apply K.F. to this system.
-\end{remark}
+\end{rem}
 
 \subsection{Extension to Non-Linear systems}
 
@@ -75,14 +75,14 @@ \subsection{Extension to Non-Linear systems}
 
 Where $f$ and $h$ are non-linear functions of $x(t)$ and $u(t)$ (smoothness class $C^1$ or higher).
 
-\begin{example}
+\begin{exa}
     \[
         S: \begin{cases}
             x(t+1) = \frac{1}{2} x^5(t) + u^3(t) + v_1(t) \\
             y(t) = e^{x(t)} + v_2(t)
         \end{cases}
     \]
-\end{example}
+\end{exa}
 
 How can we design a Kalman Filter in this case?
 
@@ -170,7 +170,7 @@ \subsection{Extension to Non-Linear systems}
     \item Compute $\hat{x}(t+1|t)$
 \end{itemize}
 
-\begin{remark}
+\begin{rem}
     \begin{itemize}
         \item EKF is very powerful since can be applied to non-linear systems
         \item Obviously EKF does not have a steady-state asymptotic solution
@@ -186,7 +186,7 @@ \subsection{Extension to Non-Linear systems}
         \item safety-critical applications
         \item mission-critical applications
     \end{itemize}
-\end{remark}
+\end{rem}
 
 \begin{exercise}[K.F. full procedure]
     \begin{figure}[H]
diff --git a/lectures/2020-05-12.tex b/lectures_AY2020/2020-05-12.tex
similarity index 99%
rename from lectures/2020-05-12.tex
rename to lectures_AY2020/2020-05-12.tex
index 199a9c6..0fe2a6a 100644
--- a/lectures/2020-05-12.tex
+++ b/lectures_AY2020/2020-05-12.tex
@@ -16,9 +16,9 @@ \subsection{Direct optimization of gain $K$}
     \item K.F. theory
 \end{enumerate}
 
-\begin{remark}
+\begin{rem}
     The system is not stable and the state equation is noise-free.
-\end{remark}
+\end{rem}
 
 \paragraph{First mehtod} Direct solution
 
diff --git a/lectures/2020-05-14.tex b/lectures_AY2020/2020-05-14.tex
similarity index 99%
rename from lectures/2020-05-14.tex
rename to lectures_AY2020/2020-05-14.tex
index cfb385f..42a3681 100644
--- a/lectures/2020-05-14.tex
+++ b/lectures_AY2020/2020-05-14.tex
@@ -50,7 +50,8 @@
 
 \section{Comparison between K.F. and B.B. software sensing}
 
-\begin{center}
+\begin{table}
+    \centering
     \begin{tabular}{l|c|c}
         & \textbf{K.F.} & \textbf{B.B.} \\
         \hline
@@ -61,7 +62,7 @@ \section{Comparison between K.F. and B.B. software sensing}
         Accuracy of the estimation & \color{green} Good & \color{green} Very Good \\
         Can be used also in case of un-measurable states & \color{green} Yes & \color{red} No \\
     \end{tabular}
-\end{center}
+\end{table}
 
 \section{Non-linear Systems}
 
@@ -91,9 +92,9 @@ \section{Non-linear Systems}
     \end{tikzpicture}
 \end{figure}
 
-\begin{remark}
+\begin{rem}
     In K.F. the E.K.F. extension uses the trick of a time-varying linear gain $K(t)$ but the obvious choice is a non-linear gain (static nonlinear function).
-\end{remark}
+\end{rem}
 
 The content of the box is:
 
@@ -223,7 +224,7 @@ \section{Non-linear Systems}
     \end{tikzpicture}
 \end{figure}
 
-\begin{remark}
+\begin{rem}
     Notice that in principle $\hat{x}(t)$ can depend on $y(t)$ whereas we know $\hat{x}(t)$ can only depend on $u(t-1)$ and past values.
 
     In case of a MIMO system with
@@ -250,7 +251,7 @@ \section{Non-linear Systems}
 
     The estimation of this function $f(\cdot, \theta)$ is much simpler than the estimation of a recurrent neural network.
     Moreover the stability is guaranteed (the system is F.I.R.).
-\end{remark}
+\end{rem}
 
 \paragraph{Architecture \#3} Static non-linear function plus linear dynamics but with a I.I.R. scheme
 
diff --git a/lectures/2020-05-18.tex b/lectures_AY2020/2020-05-18.tex
similarity index 99%
rename from lectures/2020-05-18.tex
rename to lectures_AY2020/2020-05-18.tex
index 8f34e9a..4daf96e 100644
--- a/lectures/2020-05-18.tex
+++ b/lectures_AY2020/2020-05-18.tex
@@ -111,7 +111,7 @@ \section{Using Kalman Filter}
 \paragraph{Notice} This trick can work in principle with any number of unknown parameters (e.g. 3 sensors, 10 states and 20 parameters).
 In practice it works well only on a limited number of parameters (e.g. 3 sensors, 5 states and 2 parameters).
 
-\begin{example}
+\begin{exa}
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=2cm,auto,>=latex']
@@ -202,4 +202,4 @@ \section{Using Kalman Filter}
     The system is ready for K.F. application: we get at the same time $\hat{x}(t)$ and $\hat{c}(t)$.
 
     Notice that we need Extended Kalman Filter: even if the original system was linear, state extension moved to a non-linear system.
-\end{example}
+\end{exa}
diff --git a/lectures/2020-05-19.tex b/lectures_AY2020/2020-05-19.tex
similarity index 99%
rename from lectures/2020-05-19.tex
rename to lectures_AY2020/2020-05-19.tex
index ef1a11b..fab4955 100644
--- a/lectures/2020-05-19.tex
+++ b/lectures_AY2020/2020-05-19.tex
@@ -67,7 +67,7 @@ \section{Using Simulation Error Method}
 
 Can S.E.M. be applied also to B.B. methods?
 
-\begin{example}
+\begin{exa}
     We collect data $\{ \tilde{u}(1), \tilde{u}(2), \dots, \tilde{u}(N) \}$ and $\{ \tilde{y}(1), \tilde{y}(2), \dots, \tilde{y}(N) \}$, we want to estimate from data the I/O model.
 
     \[
@@ -147,7 +147,7 @@ \section{Using Simulation Error Method}
     \end{align*}
 
     Notice that it's non-linear with respect to $\theta$.
-\end{example}
+\end{exa}
 
 P.E.M. approach looks much better, but do not forget the noise! P.E.M. is much less robust w.r.t. noise, we must include a model of the noise in the estimated model.
 We use ARMAX models.
diff --git a/lectures/2020-05-25.tex b/lectures_AY2020/2020-05-25.tex
similarity index 99%
rename from lectures/2020-05-25.tex
rename to lectures_AY2020/2020-05-25.tex
index 728facb..668e39d 100644
--- a/lectures/2020-05-25.tex
+++ b/lectures_AY2020/2020-05-25.tex
@@ -26,7 +26,7 @@ \chapter{Minimum Variance Control}
     \item $\frac{B(z)}{A(z)}$ is \emph{minimum phase}
 \end{itemize}
 
-\begin{remark}
+\begin{rem}
     $\frac{B(z)}{A(z)}$ is said to be \emph{minimum phase} if all the roots of $B(z)$ are strictly inside the unit circle.
 
     \begin{figure}[H]
@@ -59,7 +59,7 @@ \chapter{Minimum Variance Control}
     Also for human it's difficult, for example \emph{steer to roll} dynamics in a bicycle: if you want to steer left, you must first steer a little to the right and then turn left.
 
     Design of controller for non-minimum phase is difficult and requires special design techniques (no MVC but generalized MVC).
-\end{remark}
+\end{rem}
 
 The problem we wish to solve is optimal tracking of the desired behavior of output:
 \begin{figure}[H]
@@ -93,7 +93,7 @@ \chapter{Minimum Variance Control}
     \item We assume that $y^0(t)$ is known only up to time $t$ (present time), we have no preview of the future desired $y^0(t)$ ($y^0(t)$ is totally unpredictable).
 \end{itemize}
 
-\begin{remark}
+\begin{rem}
     There are 2 sub-classes of control problems:
     \begin{itemize}
         \item When $y^0(t)$ is constant or step-wise (regulation problem)
@@ -147,7 +147,7 @@ \chapter{Minimum Variance Control}
             \caption*{Tracking problem}
         \end{minipage}
     \end{figure}
-\end{remark}
+\end{rem}
 
 Bottom-up way of presenting M.V.C.
 
diff --git a/lectures/2020-05-27.tex b/lectures_AY2020/2020-05-27.tex
similarity index 98%
rename from lectures/2020-05-27.tex
rename to lectures_AY2020/2020-05-27.tex
index 5a4fb10..01bd85f 100644
--- a/lectures/2020-05-27.tex
+++ b/lectures_AY2020/2020-05-27.tex
@@ -1,6 +1,6 @@
 \newlecture{Sergio Savaresi}{27/05/2020}
 
-\begin{remark}
+\begin{rem}
     For stability let's recall a result of feedback system:
     \begin{figure}[H]
         \centering
@@ -23,7 +23,7 @@
         \item Build the \emph{characteristic polynomial} $\chi(z) = L_N(z) + L_D(z)$ (sum of numerator and denominator)
         \item Find the roots of $\chi(z)$, closed loop system is asymptotically stable iff all the roots of $\chi(z)$ are strictly inside the unit circle
     \end{itemize}
-\end{remark}
+\end{rem}
 
 \[
     L(z) = \frac{1}{b_0+b_1z^{-1}} \cdot \frac{z^{-1}(b_0+b_1z^{-1})}{1-az^{-1}} \cdot a
@@ -173,7 +173,7 @@ \subsection{Performance analysis}
 $y(t)$ exactly tracks/follows $y^0(t)$ but with $k$ steps of delay and it's disturbed by noise $E(z)e(t)$.
 This is the best possible solution.
 
-\begin{remark}
+\begin{rem}
     The closed-loop behavior is very simple.
 
     \begin{figure}[H]
@@ -219,7 +219,7 @@ \subsection{Performance analysis}
 
     The M.V. controller \emph{pushes} all the system poles into the non-observable and/or non-controllable parts of the system (it makes internal cancellations).
     It's not a problem since we verified it's internally asymptotically stable.
-\end{remark}
+\end{rem}
 
 \section{Main limits of Minimum Variance Controllers}
 
@@ -241,7 +241,7 @@ \section{Main limits of Minimum Variance Controllers}
     \item $Q(z)$ is a T.F. that makes a penalty to big values of $u(t)$
 \end{itemize}
 
-\begin{remark}[Reference model $P(t)$]
+\begin{rem}[Reference model $P(t)$]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=2cm,auto,>=latex']
@@ -257,9 +257,9 @@ \section{Main limits of Minimum Variance Controllers}
     \end{figure}
 
     The typical goal is to obtain the best possible tracking $y(t) = y^0(t)$, however perfect tracking may not be the best solution, but the best solution is to track a \emph{reference model}: $y(t) = P(z)y^0(t)$.
-\end{remark}
+\end{rem}
 
-\begin{example}[Cruise control in a car]
+\begin{exa}[Cruise control in a car]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=2cm,auto,>=latex']
@@ -313,4 +313,4 @@ \section{Main limits of Minimum Variance Controllers}
             \node[green, right] at (3,0.7) {$\scriptstyle P(z)$ \footnotesize to smooth the behavior};
         \end{tikzpicture}
     \end{figure}
-\end{example}
+\end{exa}
diff --git a/lectures/2020-06-03.tex b/lectures_AY2020/2020-06-03.tex
similarity index 98%
rename from lectures/2020-06-03.tex
rename to lectures_AY2020/2020-06-03.tex
index 2fa546d..96c7305 100644
--- a/lectures/2020-06-03.tex
+++ b/lectures_AY2020/2020-06-03.tex
@@ -1,7 +1,7 @@
 \chapter{Recursive Identification}
 \newlecture{Stefano Dattilo}{03/06/2020}
 
-\begin{recall}[ARX syatem]
+\begin{rem}[ARX syatem]
     \[
         y(t) = \frac{B(z)}{A(z)}u(t-1) + \frac{1}{A(z)}e(t) \qquad e(t) \sim WN(m_e, \lambda_e)
     \]
@@ -28,7 +28,7 @@ \chapter{Recursive Identification}
     \]
 
     \paragraph{Objective} Identify $\theta$ starting from an available dataset.
-\end{recall}
+\end{rem}
 
 \section{Least square}
 
@@ -175,7 +175,7 @@ \subsection{Third form}
     V(N) &= V(N-1) - \frac{V(N-1)\phi(N)\phi(N)^TV(N-1)}{1+\phi(N)^TV(N-1)\phi(N)}
 \end{align*}
 
-\begin{remark}
+\begin{rem}
     RLS is a rigorous version of LS (not an approximation), provided a correct initialization.
 
     \begin{figure}[H]
@@ -198,7 +198,7 @@ \subsection{Third form}
     \end{itemize}
 
     In practice $\hat{\theta}_0 = 0$ and $S(0) = I$, the error due to the \emph{wrong} initialization will expire with time.
-\end{remark}
+\end{rem}
 
 \section{Recursive Least Square with Forgetting Factor}
 
@@ -244,7 +244,7 @@ \section{Recursive Least Square with Forgetting Factor}
     S(N) &= \rho S(N-1) + \phi(N)\phi(N)^T
 \end{align*}
 
-\begin{remark}[Choice of $\rho$]
+\begin{rem}[Choice of $\rho$]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[
@@ -267,4 +267,4 @@ \section{Recursive Least Square with Forgetting Factor}
     \end{figure}
 
     If $\rho \ll 1$ there's high tracking speed but low precision. With $\rho \approx 1$ there's low tracking speed but greater precision.
-\end{remark}
+\end{rem}
diff --git a/lectures/2020-06-04.tex b/lectures_AY2020/2020-06-04.tex
similarity index 99%
rename from lectures/2020-06-04.tex
rename to lectures_AY2020/2020-06-04.tex
index 29fc720..1b2c476 100644
--- a/lectures/2020-06-04.tex
+++ b/lectures_AY2020/2020-06-04.tex
@@ -149,7 +149,7 @@ \subsection*{D to A converter}
     D &= D
 \end{align*}
 
-\begin{remark}
+\begin{rem}
     How the poles of the continuous time system are transformed?
 
     Can be proved that the eigenvalues (poles) follow the \emph{sampling transformation rule}.
@@ -198,7 +198,7 @@ \subsection*{D to A converter}
     Unfortunately these hidden zeros are frequently outside the unit circle, which means that $G(z)$ is not minimum phase even if $G(s)$ is minimum phase.
 
     We need for instance GMVC to design the control system.
-\end{remark}
+\end{rem}
 
 Another simple discretization technique frequently used is the discretization of time-derivative $\dot{x}$.
 
@@ -350,7 +350,7 @@ \subsection*{D to A converter}
     \end{tikzpicture}
 \end{figure}
 
-\begin{remark}[Another way of managing the choice of $\Delta T$ w.r.t. the aliasing problem]
+\begin{rem}[Another way of managing the choice of $\Delta T$ w.r.t. the aliasing problem]
     \begin{figure}[H]
         \centering
         \begin{tikzpicture}[node distance=2cm,auto,>=latex',declare function={
@@ -431,4 +431,4 @@ \subsection*{D to A converter}
             \draw (6.5,0.05) -- (6.5,-0.05) node[below] {$f_S$};
         \end{tikzpicture}
     \end{figure}
-\end{remark}
+\end{rem}
diff --git a/lectures_AY2020/template.tex b/lectures_AY2020/template.tex
new file mode 100644
index 0000000..3bb316e
--- /dev/null
+++ b/lectures_AY2020/template.tex
@@ -0,0 +1,81 @@
+\chapter{Template}
+\section{Table}
+
+\begin{table}[htp]
+    \centering
+    \begin{tabular}{r|l|p{10cm}}
+        Right &  Left  &  Longlonglonglonglonglonglonglong longlonglonglonglonglonglonglonglonglonglonglonglong longlonglonglonglonglong \\
+        Right &  Left  &  Longlonglonglonglonglonglong
+        longlonglonglonglonglonglonglong
+        longlonglonglong
+        longlonglonglonglonglonglonglong
+    \end{tabular}
+    \caption{This is a caption}
+    \label{tab:trans-sym}
+\end{table}
+
+\section{List}
+This is a List:
+\begin{itemize}
+    \item \textbf{Bullet 1}: Bullet 1 is bullet 1.
+    \item \textbf{Bullet 2}: Bullet 2 is bullet 2.
+\end{itemize}
+
+\section{Definition}
+\begin{definition}\label{def:def1}
+\textbf{DEFINITION NAME}: This is a definition.
+\end{definition}
+
+% avoid bad break
+\vspace{5cm}
+
+\section{Theorem}
+\begin{theo}[THEOREM NAME]{theo:theo1}
+This is a theorm. Below are equations.
+\begin{align}\label{eq:multi-equations}
+    \psi(\bvec{a}) &= A\cdot \bvec{a} + \bvec{t}.\\
+    R_x &=  \begin{bmatrix}
+            0 & \cos(\theta) & -\sin(\theta)\\
+            0 & \sin(\theta) & \cos(\theta)\\
+            1 & 0 & 0
+         \end{bmatrix},
+    R_y =  \begin{bmatrix}
+            \cos(\theta) & 0 & -\sin(\theta)\\
+            \sin(\theta) & 0 & \cos(\theta)\\
+            0 & 1 & 0
+         \end{bmatrix},
+    R_z =  \begin{bmatrix}
+            \cos(\theta) & -\sin(\theta) & 0\\
+            \sin(\theta) & \cos(\theta) & 0 \\
+            0 & 0 & 1
+         \end{bmatrix}
+\end{align}
+\end{theo}
+
+\begin{lem}[LEMMA NAME]{lem:leml}
+This is a lemma
+\end{lem}
+
+\begin{prf}[LEMMA NAME]{prf:leml}
+This is a proof.
+\end{prf}
+
+\section{Tikz Pictures}
+\begin{figure}[htp]
+    \centering
+        \begin{tikzpicture}[scale=0.6]
+            \draw[->] (0,-1)--(0,1.5)node[above] {$s$};
+            \draw[->] (-0.8,0.6) to[bend right] (0.8,0.6);
+            \draw[->] (0.9, 0.8)--(0.9, 1.2) node[right] {$\omega$};
+            \filldraw[dashed] (0,-0.2)--(0.9, -0.3) circle (1pt) node [right] {$q$};
+            \draw[->] (0.8, -0.9)--(1.2, -0.8) node[right] {$v$};
+        \end{tikzpicture}
+    \caption{This is a caption. }
+    \label{fig:rotation}
+\end{figure}
+
+
+
+
+
+\curinstructor{Ins Tructor1}
diff --git a/main.tex b/main.tex
new file mode 100644
index 0000000..87dc35a
--- /dev/null
+++ b/main.tex
@@ -0,0 +1,284 @@
+\documentclass[10pt,a4paper,twoside]{book}
+
+\input{style}
+
+\usepackage[
+  left=2.5cm, % inner
+  right=2.5cm, % outer
+  top=2.5cm,
+  bottom=3cm,
+  %showframe,
+  ]{geometry}
+
+\usepackage{xr} % cross-referencing 
+\makenoidxglossaries
+
+% Comandi pratici
+
+% d nell'integrale e i rispettivi usi
+\newcommand{\dl}{\de l}
+\newcommand{\dr}{\de r}
+\newcommand{\dxi}{\de \xi}
+\newcommand{\drho}{\de \rho}
+
+% d nell'integrale con differenziale vettoriale
+\newcommand{\dxx}{\de \x}
+\newcommand{\dyy}{\de \y}
+\newcommand{\dsig}{\de \sigg}
+
+\allowdisplaybreaks[4] % Consente di rompere equazioni su più pagine
+
+% MIDA1 acronym
+\newacronym{sp}{SP}{Stochastic Process}
+\newacronym{ssp}{SSP}{Stationary Stochastic Process}
+\newacronym{ma}{MA}{Moving Average}
+\newacronym{ar}{AR}{Auto Regressive}
+\newacronym{arx}{ARX}{Auto Regressive with Exogenous Input}
+\newacronym{arma}{ARMA}{Auto Regressive Moving Average}
+\newacronym{armax}{ARMAX}{Auto Regressive Moving Average with Exogenous Input}
+\newacronym{mse}{MSE}{Mean Square Error}
+\newacronym{wn}{WN}{White Noise}
+\newacronym{pem}{PEM}{Prediction Error Minimization}
+
+% MIDA2 acronym
+\newacronym{tf}{TF}{Transfer Function}
+\newacronym{ss}{SS}{State-Space}
+\newacronym{ir}{IR}{Impulse Response}
+\newacronym{bb}{BB}{Black-Box}
+\newacronym{wb}{WB}{White-Box}
+\newacronym{gb}{GB}{Gray-Box}
+\newacronym{kf}{KF}{Kalman Filter}
+\newacronym{dre}{DRE}{Difference Riccati Equation}
+\newacronym{are}{ARE}{Algebraic Riccati Equation}
+\newacronym{sem}{SEM}{Simulation Error Method}
+
+%c ommon used operator
+\DeclareMathOperator{\WN}{WN}
+
+\begin{document}
+
+\frontmatter
+
+\pagestyle{empty}
+
+% TITLE PAGE
+
+\hypertarget{mytitlepage}{} % set the hypertarget
+\bookmark[dest=mytitlepage,level=chapter]{Title Page} % add the bookmark
+
+\vspace*{\fill}
+\begin{center}
+	{\large \textsc{Lecture Notes of}}\\
+	
+  \vspace*{0.4cm}
+	
+  {\Huge
+  \textsc{Model Identification}\\
+	\vspace*{0.4cm}
+	\textsc{and Data Analysis}}\\
+	\vspace*{0.4cm}
+	{\huge \textsc{Part 2}}\\
+	
+  \vspace*{1cm}
+	
+  {\large {From Professor Sergio Savaresi's lectures}}\\
+	
+  \vspace*{1cm}
+	
+  {\large
+  Author\\
+  \vspace*{0.1cm}
+  \textsc{Edoardo Morassutto}\\
+  
+  \vspace*{0.4cm}
+  
+  Contributors\\
+  \vspace*{0.1cm}
+  \textsc{Marco Donadoni}\\
+  \textsc{Cosimo Russo}\\
+  \textsc{Federico Cazzola}\\
+  
+  \vspace*{0.4cm}
+	
+  Reviewed for the 2022 edition of the course by\\
+  \vspace*{0.1cm}
+  \textsc{Andrea Bosisio}\\
+
+  \vspace*{0.4cm}
+  
+  Document formatting by\\
+  \vspace*{0.1cm}
+  \textsc{Teo Bucci}}\\
+	
+  \vspace*{1cm}
+	
+  Politecnico di Milano\\A.Y. 2021/2022
+\end{center}
+\vspace*{\fill}
+\clearpage
+
+% COPYRIGHT PAGE
+
+\hypertarget{mycopyright}{} % set the hypertarget
+\bookmark[dest=mycopyright,level=chapter]{Copyright Page} % add the bookmark
+\input{firstpages/copyright}
+\clearpage
+
+% PREFACE
+
+% \hypertarget{mypreface}{} % set the hypertarget
+% \bookmark[dest=mypreface,level=chapter]{Preface} % add the bookmark
+% \input{firstpages/preface}
+% \clearpage
+
+% CONTENTS
+
+\cleardoublepage
+\pagestyle{toc}
+\hypertarget{mytoc}{} % set the hypertarget
+\bookmark[dest=mytoc,level=chapter]{\contentsname} % add the bookmark
+\tableofcontents
+\cleardoublepage
+
+% MAIN MATTER
+
+\pagestyle{fancy}
+\mainmatter
+
+%%TIKZ
+
+\usetikzlibrary{matrix,shapes.geometric}
+
+\tikzstyle{block}      = [draw, rectangle, inner sep=6pt]
+\tikzstyle{every node} = [font=\small]
+\tikzstyle{sum}        = [draw, circle, inner sep=3pt, minimum size =0.1cm]
+
+% Cross
+\tikzset{cross/.style={cross out, draw=black, minimum size=2*(#1-\pgflinewidth), inner sep=0pt, outer sep=0pt},
+  %default radius will be 1pt.
+  cross/.default={0.1cm}}
+
+% double border
+\tikzstyle{double border} = [double, double distance=0.5mm]
+% pattern in border
+\newcounter{tmp}
+\newif\ifpathisclosed
+\tikzset{dashed border/.style={
+    preaction={decoration={contour lineto closed, contour distance=2pt},
+      decorate,
+    },
+    postaction={
+      insert path={%
+        \pgfextra{%
+          \pgfinterruptpath
+          \path[pattern=north west lines, pattern color=black,even odd rule]
+          \mySecondList \myList
+          ;
+        \endpgfinterruptpath}
+    }},
+}}
+\def\pgfdecoratedcontourdistance{0pt}
+\pgfset{
+  decoration/contour distance/.code=%
+\pgfmathsetlengthmacro\pgfdecoratedcontourdistance{#1}}
+\pgfdeclaredecoration{contour lineto closed}{start}{%
+  \state{start}[
+  next state=draw,
+  width=0pt,
+  persistent precomputation=\let\pgf@decorate@firstsegmentangle\pgfdecoratedangle]{%
+    %\xdef\myList{}\xdef\mySecondList{}%
+    \setcounter{tmp}{0}%
+    \global\pathisclosedfalse%
+    \pgfpathmoveto{\pgfpointlineattime{.5}
+      {\pgfqpoint{0pt}{\pgfdecoratedcontourdistance}}
+    {\pgfqpoint{\pgfdecoratedinputsegmentlength}{\pgfdecoratedcontourdistance}}}%
+  }%
+  \state{draw}[next state=draw, width=\pgfdecoratedinputsegmentlength]{%
+    \ifpgf@decorate@is@closepath@%
+      \pgfmathsetmacro\pgfdecoratedangletonextinputsegment{%
+      -\pgfdecoratedangle+\pgf@decorate@firstsegmentangle}%
+    \fi
+    \pgfmathsetlengthmacro\pgf@decoration@contour@shorten{%
+    -\pgfdecoratedcontourdistance*cot(-\pgfdecoratedangletonextinputsegment/2+90)}%
+    \pgfpathlineto
+    {\pgfpoint{\pgfdecoratedinputsegmentlength+\pgf@decoration@contour@shorten}
+    {\pgfdecoratedcontourdistance}}%
+    \stepcounter{tmp}%
+    \pgfcoordinate{muemmel\number\value{tmp}}{\pgfpoint{\pgfdecoratedinputsegmentlength+\pgf@decoration@contour@shorten}
+    {\pgfdecoratedcontourdistance}}%
+    \pgfcoordinate{feep\number\value{tmp}}{\pgfpoint{\pgfdecoratedinputsegmentlength}{0pt}}%
+    \ifnum\value{tmp}=1\relax%
+     \pgfcoordinate{muemmel0}{\pgfpoint{0pt}{\pgfdecoratedcontourdistance}}%
+     \pgfcoordinate{feep0}{\pgfpoint{0pt}{0pt}}%
+     \xdef\myList{(muemmel\number\value{tmp})}%
+     \xdef\mySecondList{(feep\number\value{tmp})}%
+    \else
+     \xdef\myList{\myList -- (muemmel\number\value{tmp})}%
+     \xdef\mySecondList{(feep\number\value{tmp}) -- \mySecondList}%
+    \fi
+    \ifpgf@decorate@is@closepath@%
+      \pgfpathclose
+      \global\pathisclosedtrue%
+    \fi
+  }%
+  \state{final}{%\typeout{\myList,\mySecondList}%
+    \ifpathisclosed%
+      \xdef\myList{\myList -- cycle}%
+      \xdef\mySecondList{\mySecondList -- cycle}%
+      %\typeout{closed \mySecondList \myList }
+    \else
+      %\typeout{\number\value{tmp}}%
+      \xdef\myList{(muemmel0) -- \myList -- cycle}%
+      \xdef\mySecondList{\mySecondList -- (feep0) --}%
+      %\typeout{not closed \mySecondList \myList }%
+    \fi
+  }%
+}
+\tikzset{
+  contour/.style={
+    decoration={
+      name=contour lineto closed,
+      contour distance=#1
+    },
+decorate}}
+
+% pattern
+\tikzset{
+  hatch distance/.store in=\hatchdistance,
+  hatch distance=10pt,
+  hatch thickness/.store in=\hatchthickness,
+  hatch thickness=0.2pt
+}
+\pgfdeclarepatternformonly[\hatchdistance,\hatchthickness]{flexible hatch}
+{\pgfqpoint{0pt}{0pt}}
+{\pgfqpoint{\hatchdistance}{\hatchdistance}}
+{\pgfpoint{\hatchdistance-1pt}{\hatchdistance-1pt}}%
+{
+  \pgfsetcolor{\tikz@pattern@color}
+  \pgfsetlinewidth{\hatchthickness}
+  \pgfpathmoveto{\pgfqpoint{0pt}{0pt}}
+  \pgfpathlineto{\pgfqpoint{\hatchdistance}{\hatchdistance}}
+  \pgfusepath{stroke}
+}
+
+%\tikzstyle{input} = [coordinate]
+%\tikzstyle{output} = [coordinate]
+%\tikzstyle{pinstyle} = [pin edge={to-,thin,black}]
+
+\input{lectures/2022_04_06}
+\input{lectures/2022_04_07}
+\input{lectures/2022_04_11}
+\input{lectures/2022_04_12}
+\input{lectures/2022_04_14}
+\input{lectures/2022_04_20}
+\input{lectures/2022_04_21}
+\input{lectures/2022_04_27}
+\input{lectures/2022_05_02}
+\input{lectures/2022_05_09}
+\input{lectures/2022_05_12}
+
+\glsaddall
+\printnoidxglossary[type=\acronymtype,title=Glossary, toctitle=Glossary, nonumberlist]
+\cleardoublepage
+
+\end{document}
diff --git a/style.tex b/style.tex
new file mode 100644
index 0000000..168eee9
--- /dev/null
+++ b/style.tex
@@ -0,0 +1,428 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Template Dispense
+% Autore: Teo Bucci
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%---------------------------
+% FONTS AND LANGUAGE
+%---------------------------
+
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage[english]{babel}
+
+%---------------------------
+% PACKAGES
+%---------------------------
+
+\usepackage{dsfont} % for using \mathds{1} characteristic function
+\usepackage{amsmath, amssymb, amsthm} % amssymb also loads amsfonts
+\usepackage{latexsym}
+
+\usepackage{booktabs}
+\usepackage{pgfplots}
+\usepackage{tikz}
+\usetikzlibrary{
+  positioning,
+  shapes.misc,
+  intersections,
+  shapes.symbols,
+  patterns,
+  fadings,
+  shadows.blur,
+  decorations.pathreplacing,
+  arrows.meta,
+  arrows
+}
+\usepackage{mathdots}
+\usepackage{cancel}
+\usepackage{color}
+\usepackage{siunitx}
+\usepackage{array}
+\usepackage{multirow}
+\usepackage{makecell}
+\usepackage{tabularx}
+\usepackage{caption}
+\captionsetup{belowskip=12pt,aboveskip=4pt}
+\usepackage{subcaption}
+\usepackage{placeins} % \FloatBarrier
+\usepackage{flafter}  % The flafter package ensures that floats don't appear until after they appear in the code.
+\usepackage[shortlabels]{enumitem}
+\usepackage[english]{varioref}
+\renewcommand{\ref}{\vref}
+
+%---------------------------
+% INCLUSIONE FIGURE
+%---------------------------
+
+\usepackage{import}
+\usepackage{pdfpages}
+\usepackage{transparent}
+\usepackage{xcolor}
+\usepackage{graphicx}
+\graphicspath{ {./images/} } % Path relative to the main .tex file
+\usepackage{float}
+
+\newcommand{\fg}[3][\relax]{%
+  \begin{figure}[H]%[htp]%
+    \centering
+    \captionsetup{width=0.7\textwidth}
+      \includegraphics[width = #2\textwidth]{#3}%
+      \ifx\relax#1\else\caption{#1}\fi
+      \label{#3}
+  \end{figure}%
+  \FloatBarrier%
+}
+
+%---------------------------
+% PARAGRAPHS AND LINES
+%---------------------------
+
+\usepackage[none]{hyphenat} % no hyphenation
+
+\emergencystretch 3em % to prevent the text from going beyond margins
+
+\usepackage[skip=0.2\baselineskip+2pt]{parskip}
+
+% \renewcommand{\baselinestretch}{1.5} % line spacing
+
+%---------------------------
+% HEADERS AND FOOTERS
+%---------------------------
+
+\usepackage{fancyhdr}
+
+\fancypagestyle{toc}{%
+\fancyhf{}%
+\fancyfoot[C]{\thepage}%
+\renewcommand{\headrulewidth}{0pt}%
+\renewcommand{\footrulewidth}{0pt}
+}
+
+\fancypagestyle{fancy}{%
+\fancyhf{}%
+\fancyhead[RE]{\nouppercase{\leftmark}}%
+\fancyhead[LO]{\nouppercase{\rightmark}}%
+\fancyhead[LE,RO]{\thepage}%
+\renewcommand{\footrulewidth}{0pt}%
+\renewcommand{\headrulewidth}{0.4pt}
+}
+
+% Removes the header from odd empty pages at the end of chapters
+\makeatletter
+\renewcommand{\cleardoublepage}{
+\clearpage\ifodd\c@page\else
+\hbox{}
+\vspace*{\fill}
+\thispagestyle{empty}
+\newpage
+\fi}
+
+%---------------------------
+% CUSTOM
+%---------------------------
+
+\usepackage{xspace}
+\newcommand{\latex}{\LaTeX\xspace}
+\newcommand{\tex}{\TeX\xspace}
+
+\newcommand{\Tau}{\mathcal{T}}
+\newcommand{\Ind}{\mathds{1}} % indicatrice
+
+\newcommand{\transpose}{^{\mathrm{T}}}
+\newcommand{\complementary}{^{\mathrm{C}}} % alternative ^{\mathrm{C}} ^{\mathrm{c}} ^{\mathsf{c}}
+\newcommand{\degree}{^\circ\text{C}} % simbolo gradi
+
+\newcommand{\notimplies}{\mathrel{{\ooalign{\hidewidth$\not\phantom{=}$\hidewidth\cr$\implies$}}}}
+\newcommand{\questeq}{\overset{?}{=}} % è vero che?
+
+\newcommand{\indep}{\perp \!\!\! \perp} % indipendenza
+\newcommand{\iid}{\stackrel{\mathrm{iid}}{\sim}}
+\newcommand{\event}[1]{\emph{``#1''}} % evento
+
+% variazioni del simbolo "="
+\newcommand{\iideq}{\overset{\text{\tiny iid}}{=}}
+\newcommand{\ideq}{\overset{\text{\tiny id}}{=}}
+\newcommand{\indepeq}{\overset{\perp \!\!\! \perp}{=}}
+
+\newcommand{\boxedText}[1]{\noindent\fbox{\parbox{\textwidth}{#1}}}
+
+\renewcommand{\emptyset}{\varnothing}
+\renewcommand{\tilde}{\widetilde}
+\renewcommand{\hat}{\widehat}
+
+\DeclareMathOperator{\sgn}{sgn}
+\DeclareMathOperator{\Var}{Var}
+\DeclareMathOperator{\Cov}{Cov}
+\DeclareMathOperator*{\rank}{rank}
+\DeclareMathOperator*{\eig}{eig}
+\DeclareMathOperator{\tr}{tr}
+\DeclareMathOperator{\Grad}{grad}
+\DeclareMathOperator{\Div}{div}
+\DeclareMathOperator{\Span}{span}
+\let\Im\undefined  % redefine \Im
+\DeclareMathOperator{\Im}{Im}
+\DeclareMathOperator{\Ker}{Ker}
+\DeclareMathOperator*{\argmin}{arg\,min}
+\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\esssup}{ess\ sup}
+\DeclareMathOperator*{\essinf}{ess\ inf}
+\DeclareMathOperator*{\supp}{supp}
+
+\newcommand{\eps}{\varepsilon}
+
+\usepackage{mathtools} % Serve per i comandi dopo
+\DeclarePairedDelimiter{\abs}{\lvert}{\rvert} % absolute value
+\DeclarePairedDelimiter{\norm}{\lVert}{\rVert} % norm
+\DeclarePairedDelimiter{\sca}{\langle}{\rangle} % scalar product
+
+% Bold
+\renewcommand{\AA}{\mathbb A}
+\newcommand{\BB}{\mathbb{B}}
+\newcommand{\CC}{\mathbb{C}}
+\newcommand{\DD}{\mathbb{D}}
+\newcommand{\EE}{\mathbb{E}}
+\newcommand{\FF}{\mathbb{F}}
+\newcommand{\GG}{\mathbb{G}}
+\newcommand{\HH}{\mathbb{H}}
+\newcommand{\II}{\mathbb{I}}
+\newcommand{\JJ}{\mathbb{J}}
+\newcommand{\KK}{\mathbb{K}}
+\newcommand{\LL}{\mathbb{L}}
+\newcommand{\MM}{\mathbb{M}}
+\newcommand{\NN}{\mathbb{N}}
+\newcommand{\OO}{\mathbb{O}}
+\newcommand{\PP}{\mathbb{P}}
+\newcommand{\QQ}{\mathbb{Q}}
+\newcommand{\RR}{\mathbb{R}}
+\renewcommand{\SS}{\mathbb S}
+\newcommand{\TT}{\mathbb{T}}
+\newcommand{\UU}{\mathbb{U}}
+\newcommand{\VV}{\mathbb{V}}
+\newcommand{\WW}{\mathbb{W}}
+\newcommand{\XX}{\mathbb{X}}
+\newcommand{\YY}{\mathbb{Y}}
+\newcommand{\ZZ}{\mathbb{Z}}
+
+% Calligraphic
+\newcommand{\Ac}{\mathcal{A}}
+\newcommand{\Bc}{\mathcal{B}}
+\newcommand{\Cc}{\mathcal{C}}
+\newcommand{\Dc}{\mathcal{D}}
+\newcommand{\Ec}{\mathcal{E}}
+\newcommand{\Fc}{\mathcal{F}}
+\newcommand{\Gc}{\mathcal{G}}
+\newcommand{\Hc}{\mathcal{H}}
+\newcommand{\Ic}{\mathcal{I}}
+\newcommand{\Jc}{\mathcal{J}}
+\newcommand{\Kc}{\mathcal{K}}
+\newcommand{\Lc}{\mathcal{L}}
+\newcommand{\Mc}{\mathcal{M}}
+\newcommand{\Nc}{\mathcal{N}}
+\newcommand{\Oc}{\mathcal{O}}
+\newcommand{\Pc}{\mathcal{P}}
+\newcommand{\Qc}{\mathcal{Q}}
+\newcommand{\Rc}{\mathcal{R}}
+\newcommand{\Sc}{\mathcal{S}}
+\newcommand{\Tc}{\mathcal{T}}
+\newcommand{\Uc}{\mathcal{U}}
+\newcommand{\Vc}{\mathcal{V}}
+\newcommand{\Wc}{\mathcal{W}}
+\newcommand{\Xc}{\mathcal{X}}
+\newcommand{\Yc}{\mathcal{Y}}
+\newcommand{\Zc}{\mathcal{Z}}
+
+% differenziale
+\newcommand{\dspace}{\,} % \, aggiunge un piccolo spazio
+\newcommand{\de}{\mathrm{d}}
+\newcommand{\dx}{\dspace \de x}
+\newcommand{\dy}{\dspace \de y}
+\newcommand{\dt}{\dspace \de t}
+\newcommand{\ds}{\dspace \de s}
+\newcommand{\dz}{\dspace \de z}
+\newcommand{\dw}{\dspace \de w}
+\newcommand{\du}{\dspace \de u}
+\newcommand{\dv}{\dspace \de v}
+\newcommand{\dteta}{\dspace \de \vartheta}
+\newcommand{\dxy}{\dspace \de x \de y}
+\newcommand{\duv}{\dspace \de u \de v}
+\newcommand{\dst}{\dspace \de s \de t}
+\newcommand{\dP}{\dspace \de P}
+\newcommand{\dPP}{\dspace \de \PP}
+
+\newcommand{\SDP}{(\Omega,\Ac,\PP)} % spazio di probabilità
+\newcommand{\Cz}{\Cc^0}
+\newcommand{\Cu}{\Cc^1}
+\newcommand{\Lu}{\mathcal{L}^1}
+
+\newcommand{\fXY}{f_{(X,Y)}}
+\newcommand{\fXYxy}{\fXY(x,y)}
+
+% spaziature https://tex.stackexchange.com/questions/438612/space-between-exists-and-forall
+% questo aggiunge un piccolo spazio dopo \forall
+\let\oldforall\forall
+\renewcommand{\forall}{\oldforall \, }
+% questo aggiunge un piccolo spazio dopo \exists
+\let\oldexist\exists
+\renewcommand{\exists}{\oldexist \: }
+% questo aggiunge un comando \existsu per l'esiste ed è unico
+\newcommand\existu{\oldexist! \: }
+
+%---------------------------
+% APPENDICE
+%---------------------------
+
+\usepackage[title,titletoc]{appendix}
+
+%---------------------------
+% THEOREMS
+%---------------------------
+
+\definecolor{grey245}{RGB}{245,245,245}
+
+\newtheoremstyle{blacknumbox} % Theorem style name
+{0pt}% Space above
+{0pt}% Space below
+{\normalfont}% Body font
+{}% Indent amount
+{\bf\scshape}% Theorem head font --- {\small\bf}
+{.\;}% Punctuation after theorem head
+{0.25em}% Space after theorem head
+{\small\thmname{#1}\nobreakspace\thmnumber{\@ifnotempty{#1}{}\@upn{#2}}% Theorem text (e.g. Theorem 2.1)
+%{\small\thmname{#1}% Theorem text (e.g. Theorem)
+\thmnote{\nobreakspace\the\thm@notefont\normalfont\bfseries---\nobreakspace#3}}% Optional theorem note
+
+\newtheoremstyle{unnumbered} % Theorem style name
+{0pt}% Space above
+{0pt}% Space below
+{\normalfont}% Body font
+{}% Indent amount
+{\bf\scshape}% Theorem head font --- {\small\bf}
+{.\;}% Punctuation after theorem head
+{0.25em}% Space after theorem head
+{\small\thmname{#1}\thmnumber{\@ifnotempty{#1}{}\@upn{#2}}% Theorem text (e.g. Theorem 2.1)
+%{\small\thmname{#1}% Theorem text (e.g. Theorem)
+\thmnote{\nobreakspace\the\thm@notefont\normalfont\bfseries---\nobreakspace#3}}% Optional theorem note
+
+\newcounter{dummy}
+\numberwithin{dummy}{chapter}
+
+\theoremstyle{blacknumbox}
+\newtheorem{definitionT}[dummy]{Definition}
+\newtheorem{theoremT}[dummy]{Theorem}
+\newtheorem{corollaryT}[dummy]{Corollary}
+\newtheorem{lemmaT}[dummy]{Lemma}
+
+% Per gli unnumbered tolgo il \nobreakspace subito dopo {\small\thmname{#1} perché altrimenti c'è uno spazio tra Teorema e il ".", lo spazio lo voglio solo se sono numerati per distanziare Teorema e "(2.1)"
+\theoremstyle{unnumbered}
+\newtheorem*{remarkT}{Remark}
+\newtheorem*{proofT}{Proof}
+\newtheorem*{exampleT}{Example}
+
+\RequirePackage[framemethod=default]{mdframed} % Required for creating the theorem, definition, exercise and corollary boxes
+
+% orange box
+\newmdenv[skipabove=7pt,
+skipbelow=7pt,
+rightline=false,
+leftline=true,
+topline=false,
+bottomline=false,
+linecolor=orange,
+backgroundcolor=orange!0,
+innerleftmargin=5pt,
+innerrightmargin=5pt,
+innertopmargin=5pt,
+leftmargin=0cm,
+rightmargin=0cm,
+linewidth=2pt,
+innerbottommargin=5pt]{oBox}
+
+% green box
+\newmdenv[skipabove=7pt,
+skipbelow=7pt,
+rightline=false,
+leftline=true,
+topline=false,
+bottomline=false,
+linecolor=green,
+backgroundcolor=green!0,
+innerleftmargin=5pt,
+innerrightmargin=5pt,
+innertopmargin=5pt,
+leftmargin=0cm,
+rightmargin=0cm,
+linewidth=2pt,
+innerbottommargin=5pt]{gBox}
+
+% blue box
+\newmdenv[skipabove=7pt,
+skipbelow=7pt,
+rightline=false,
+leftline=true,
+topline=false,
+bottomline=false,
+linecolor=blue,
+backgroundcolor=blue!0,
+innerleftmargin=5pt,
+innerrightmargin=5pt,
+innertopmargin=5pt,
+leftmargin=0cm,
+rightmargin=0cm,
+linewidth=2pt,
+innerbottommargin=5pt]{bBox}
+
+% dim box
+\newmdenv[skipabove=7pt,
+skipbelow=7pt,
+rightline=false,
+leftline=true,
+topline=false,
+bottomline=false,
+linecolor=black,
+backgroundcolor=grey245!0,
+innerleftmargin=5pt,
+innerrightmargin=5pt,
+innertopmargin=5pt,
+leftmargin=0cm,
+rightmargin=0cm,
+linewidth=2pt,
+innerbottommargin=5pt]{blackBox}
+
+\newenvironment{defn}{\begin{bBox}\begin{definitionT}}{\end{definitionT}\end{bBox}}
+\newenvironment{thm}{\begin{gBox}\begin{theoremT}}{\end{theoremT}\end{gBox}}
+\newenvironment{coro}{\begin{oBox}\begin{corollaryT}}{\end{corollaryT}\end{oBox}}
+\newenvironment{lemma}{\begin{oBox}\begin{lemmaT}}{\end{lemmaT}\end{oBox}}
+\newenvironment{rem}{\begin{oBox}\begin{remarkT}}{\end{remarkT}\end{oBox}}
+\newenvironment{exa}{\begin{blackBox}\begin{exampleT}}{\end{exampleT}\end{blackBox}}
+
+\renewcommand\qedsymbol{$\blacksquare$}
+\renewenvironment{proof}{\begin{blackBox}\begin{proofT}}{\[\qed\]\end{proofT}\end{blackBox}}
+
+%---------------------------
+% CONTENTS
+%---------------------------
+
+\setcounter{secnumdepth}{3} % \subsubsection is level 3
+\setcounter{tocdepth}{2}
+
+\usepackage{bookmark}% loads hyperref too
+    \hypersetup{
+        %pdftitle={Fundamentos de C\'alculo},
+        %pdfsubject={C\'alculo diferencial},
+        bookmarksnumbered=true,
+        bookmarksopen=true,
+        bookmarksopenlevel=1,
+        hidelinks,% remove border and color
+        pdfstartview=Fit, % Fits the page to the window.
+        pdfpagemode=UseOutlines, %Determines how the file is opening in Acrobat; the possibilities are UseNone, UseThumbs (show thumbnails), UseOutlines (show bookmarks), FullScreen, UseOC (PDF 1.5), and UseAttachments (PDF 1.6). If no mode if explicitly chosen, but the bookmarks option is set, UseOutlines is used.
+    }
+
+\usepackage{glossaries} % certain packages that must be loaded before glossaries, if they are required: hyperref, babel, polyglossia, inputenc and fontenc
+\setacronymstyle{long-short}
+
+% hide section from the ToC \tocless\section{hide}
+\newcommand{\nocontentsline}[3]{}
+\newcommand{\tocless}[2]{\bgroup\let\addcontentsline=\nocontentsline#1{#2}\egroup}
+
+\usepackage[textsize=tiny, textwidth=1.5cm]{todonotes} % add disable to options to not show in pdf