1heart · yanshen0 · Sep 7, 2017
diff --git a/notes.tex b/notes.tex
@@ -276,20 +276,27 @@ \part*{September 7}
 $$ \Sigma_y = \Sigma_x + \sigma^2 I $$
 
 $$ \Sigma_x \rightarrow \Sigma_y $$
+In this case, eigenvectors of the 2 matrices are the same, and eigenvalues only differ by $\sigma^2$
 
 $$ S_y = \frac{1}{n} \sum_i (x_i + z_i ) (x_i + z_i)^T = S_x \text{``+''} S_z $$
+In this case though, top few eigenvectors are only `consistent", i.e. having positive correlation; and eigenvalues increase.
 
-Think of $X \in \mathbb{R}^{p \times n} = U \pmb{1}^T = [U \cdots U]$.
+In the class we have a demo showing the eigenvalue distribution of $S_y$, where $x_i = \alpha_i u, \alpha_i \sim N(0,1)$, and $z_i$ is also normal.
+% Think of $X \in \mathbb{R}^{p \times n} = U \pmb{1}^T = [U \cdots U]$.
 
-\textbf{Null case}
+\textbf{Null case: consider only $z$}
 
 $$ S = \frac{1}{n} \sum_i z_i z_i^T , z_i \sim N(0, I_p) $$
 
 Question: what is eig($S$) like?
 
-\textbf{Marcenko-Pastur Law '67}: $ P, n \rightarrow + \infty, P/n \rightarrow \gamma > 0 $. Distribution of eigenvalues of $S$ converges to limiting density
+\textbf{(White) Marcenko-Pastur Law '67}: $ P, n \rightarrow + \infty, P/n \rightarrow \gamma > 0 $. Distribution of eigenvalues of $S$ converges to limiting density:\\
+When $\gamma \leq 1$
 
 $$p_{MP} (t) = \frac{\sqrt{(t-a)(b-t)}}{2\pi \gamma t}, a < t < b, a = (1-\sqrt{\gamma})^2 , b = (1+\sqrt{\gamma})^2$$
+When  $\gamma > 1$
+$$p_{MP} (t) + (1-\frac{1}{\gamma})\delta_0(t)$$
+This is because when $P > n$, at least $P-n$ of the eigenvalues are 0.
 
 The mathematical formulation:
 
@@ -299,38 +306,35 @@ \part*{September 7}
 
 for eigenvalues of $S$ $\lambda_1 , \cdots, \lambda_p$
 
-The convergence of $ESD_S(S) \rightarrow^w p_{MP} (t)$ almost surely
+The convergence of $ESD_S(S) \rightarrow^{weakly} p_{MP} (t)$ almost surely
 
 Remark: when $\gamma \rightarrow 0^+$, this density converges to $\delta_1$ (population case). when $\gamma \rightarrow +\infty$, this density will be roughly a semicircle centered at $\gamma$
 
 
 Remark: Convergence to limit density is \text{fast}
 
-$$ p , \sim 10^2 \text{ , theapproximation is OK} $$
+$$ p , \sim 10^2 \text{ , the approximation is OK} $$
 
 Remark: ``White MP''
 
 $$ z_i \sim N(0, I_p) $$
 
-$$ \text{ when } z_i \sim N(0, \Sigma), \Sigma \neq I $$
 
-``Colored MP''
 
-Limiting distribution of eig($\Sigma$), $\frac{1}{2}d_1 + \frac{1}{2} d_2$
+``Colored MP''
+$$ \text{ when } z_i \sim N(0, \Sigma), \Sigma \neq I $$
+For example, if $\Sigma$ has 2 eigenvalues $d_1, d_2$, the the limiting distribution of eig($S$) when $P/n \rightarrow 0$ is like $\frac{1}{2}d_1 + \frac{1}{2} d_2$.
 
 \section*{Proof of Marcenko-Pasture Density}
-
+Recall first the proof of CLT, we prove that the characteristic function of $\frac{1}{n}\sum_i^n x_i$ converges to that of normal distribution.
 $$ \mathbb{E}_{x \sim p} e^{i \xi x} = \phi(\xi) $$
 
 $$ \mathbb{E} \exp[i \xi \frac{1}{\sqrt{n}} (x_1 + \cdots + x_n)] = \prod_i ( \mathbb{E} \exp[i \xi \frac{x_i}{n})] = \text{ Taylor series approximation } \rightarrow \phi(\xi)_{\text{Gaussian}} $$
 
-Stieltjes Transform of $\mu$: $p(t)$ probabilistic density $d \mu(t) = p(t) dt$
-
-$m_\mu(\xi) = \int_{-\infty}^{+\infty} \frac{1}{t-\xi} d \mu(t), I_m(\xi) > 0$
-
-Lemma 1: A sequence of probability measures $\mu_n \rightarrow \mu$ if and only if $m_n(\xi) \rightarrow m(\xi), \forall \xi, Im(z) > 0$, $m_n(\xi)$ is the Stieltjes Transform of $\mu_n$.
-
-Lemma 2: MP Equation: consider $m(\xi)$, the Stieltjes Transform of the MP density. Then $ \xi + \frac{1}{m(\xi)} = \frac{1}{1+\delta m(\xi)}$ (*)
+Similarly here, we consider the Stieltjes Transform of $\mu$: $p(t)$ probabilistic density $d \mu(t) = p(t) dt$, $m_\mu(z) = \int_{-\infty}^{+\infty} \frac{1}{t-z} d \mu(t), I_m(z) > 0$, and we hope to prove the convergence in the sense of Stieltjes Transform.\\
+\\
+Lemma 1: A sequence of probability measures $\mu_n \rightarrow \mu$ if and only if $m_n(z) \rightarrow m(z), \forall \xi, Im(z) > 0$, $m_n(\xi)$ is the Stieltjes Transform of $\mu_n$.\\
+Lemma 2: MP Equation: consider $m(\xi)$, the Stieltjes Transform of the MP density. Then $ z + \frac{1}{m(z)} = \frac{1}{1+\delta m(z)}$ (*)
 
 Proof of Lemma 2:
 
@@ -340,40 +344,41 @@ \section*{Proof of Marcenko-Pasture Density}
 
 The solution of (*)
 
-$$ m(\xi) = \frac{-(\xi + \gamma -1) \pm \sqrt{ \cdots}}{2 \xi \delta} $$
+$$ m(z) = \frac{-(z+ \gamma -1) \pm \sqrt{ \cdots}}{2 z \delta} $$
 
 Verify that $Im(m(t+ib)) \rightarrow \pi \cdot p_{MP}(t)$
 
 Back to the main theorem:
 
-It suffices to show that $m_n (\xi)$ on the limit of $n, p \rightarrow \infty, \cdots$ satisfies (*):
+It suffices to show that $m_n (z)$ on the limit of $n, p \rightarrow \infty, \cdots$ satisfies (*):
 
 
-$$ m_n(\xi) = \int_\mathbb{R} \frac{1}{t-\xi}  ESD_{s}(t) dt $$
+$$ m_n(z) = \int_\mathbb{R} \frac{1}{t-z}  ESD_{s}(t) dt $$
 
-$$ \frac{1}{p} \sum_{i=1}^p \frac{1}{\lambda_i - \xi} = \frac{1}{p} Tr(S - \xi I)^{-1} $$
+$$ \frac{1}{p} \sum_{i=1}^p \frac{1}{\lambda_i - z} = \frac{1}{p} Tr(S - z I)^{-1} $$
 
-Recall that $S = \frac{1}{n} \sum_{i} \xi_i \xi_i^T = \sum_{i=1}^n x_i x_i^T, x_i = \frac{\xi_i}{\sqrt{n}}$
+Recall that $S = \frac{1}{n} \sum_{i} z_i z_i^T = \sum_{i=1}^n x_i x_i^T, x_i = \frac{\xi_i}{\sqrt{n}} = B$, somehow the notation changed from $S$ to $B$, but they are the same thing.
 
 Identity:
 
-$$ I + \xi(B-\xi I)^{-1} B(B- \xi I)^{-1} $$
+$$ I + z(B-z I)^{-1}= B(B- z I)^{-1} $$
 
-``$I + \frac{\xi}{B-\xi} = \frac{B}{B-\xi_i}$''
+``$I + \frac{z}{B-z} = \frac{B}{B-z_i}$''
 
-$\frac{1}{p} Tr(\cdot)$ both sides:
+Apply $\frac{1}{p} Tr(\cdot)$ on both sides:
 
-$$ 1 + \xi \frac{1}{p} Tr(B - \xi)^{-1} = \frac{1}{p} Tr(B (B- \xi)^{-1}) $$
+$$ 1 + z \frac{1}{p} Tr(B - z)^{-1} = \frac{1}{p} Tr(B (B- z)^{-1}) $$
 
-$$ = \frac{1}{p} \sum_i Tr(x_i x_i^T (B- \xi I)^{-1}) $$
+$$ = \frac{1}{p} \sum_i Tr(x_i x_i^T (B- z I)^{-1}) = \frac{1}{p} \sum_i x_i^T(B - z I)^{-1} x_i  $$
 
-$$ x_i^T(B - \xi I)^{-1} x_i = x_i^T(x_ix_i^T + B_{(i)} + B_{(i)} - \xi I)^{-1} x_i $$
+Consider
+$$ x_i^T(B - z I)^{-1} x_i = x_i^T(x_ix_i^T + B_{(i)}  - z I)^{-1} x_i $$
 
-$$ B = \sum_{j \neq i} x_j x_j^T + x_i x_i^T $$
+$$ B_{(i)} = \sum_{j \neq i} x_j x_j^T $$
 
 Using Sherman-Morrison:
 
-$$ \frac{\gamma m_n(\xi)}{1+\gamma m(\xi)} $$
+$$ \frac{\gamma m_n(z)}{1+\gamma m(z)} $$
 
 Look at the reference book by Terry Tao on random matrices. Search for introduction to random matrix theory.