Ng_DLMooc_c2wk1.html

<!DOCTYPE html>
<html lang="zh-Hant"
>
<head>
    <title>[Improving Deep Neural Networks] week1. Practical aspects of Deep Learning - mx's blog</title>
    <!-- Using the latest rendering mode for IE -->
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">


    <meta name="theme-color" content="#6b5594">
    <meta name="msapplication-navbutton-color" content="#6b5594">
    <meta name="apple-mobile-web-app-status-bar-style" content="#6b5594">
    <link rel="manifest" href="/manifest.json">

<link rel="canonical" href="https://x-wei.github.io/Ng_DLMooc_c2wk1.html">

        <meta name="author" content="mx" />
        <meta name="keywords" content="deep learning" />
        <meta name="description" content="Setting up your Maching Learning Application Train / Dev / Test sets Applied ML: highly iterative process. idea-code-exp loop splitting data splitting data in order to speed up the idea-code-exp loop: *training set / dev(hold-out/cross-validataion) set / test set * split ratio: with 100~10000 examples: 70/30 or 60/20/20 with ..." />

        <meta property="og:site_name" content="mx's blog" />
        <meta property="og:type" content="article"/>
        <meta property="og:title" content="[Improving Deep Neural Networks] week1. Practical aspects of Deep Learning"/>
        <meta property="og:url" content="https://x-wei.github.io/Ng_DLMooc_c2wk1.html"/>
        <meta property="og:description" content="Setting up your Maching Learning Application Train / Dev / Test sets Applied ML: highly iterative process. idea-code-exp loop splitting data splitting data in order to speed up the idea-code-exp loop: *training set / dev(hold-out/cross-validataion) set / test set * split ratio: with 100~10000 examples: 70/30 or 60/20/20 with ..."/>
        <meta property="article:published_time" content="2017-10-21" />
            <meta property="article:section" content="notes" />
            <meta property="article:tag" content="deep learning" />
            <meta property="article:author" content="mx" />
        <meta property="og:image"
                  content="https://x-wei.github.io/Ng_DLMooc_c2wk1.png"/>


    <!-- Bootstrap -->
        <link href="https://x-wei.github.io/theme/css/bootstrap.min.css" rel="stylesheet">

    <link href="https://x-wei.github.io/theme/css/font-awesome.min.css" rel="stylesheet">

    <link href="https://x-wei.github.io/theme/css/pygments/manni.css" rel="stylesheet">
    <link href="https://x-wei.github.io/theme/tipuesearch/tipuesearch.css" rel="stylesheet">
    <link rel="stylesheet" href="https://x-wei.github.io/theme/css/style.css" type="text/css"/>

        <link href="https://x-wei.github.io/feeds/atom.xml" type="application/atom+xml" rel="alternate"
              title="mx's blog ATOM Feed"/>

        <link href="https://x-wei.github.io/theme/css/material.min.css" rel="stylesheet">
        <link href="https://x-wei.github.io/theme/css/ripples.css" rel="stylesheet">
</head>
<body>
<div style="display:none" id="title">[Improving Deep Neural Networks] week1. Practical aspects of Deep Learning - mx's blog</div>

<div class="navbar navbar-default navbar-fixed-top" role="navigation">
    <div class="container">
         <div class="navbar-header">
            <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-ex1-collapse">
                <span class="sr-only">切换导航</span>
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
                <span class="icon-bar"></span>
            </button>
            <a href="https://x-wei.github.io/" class="navbar-brand">
mx's blog            </a>
        </div>
        <div class="collapse navbar-collapse navbar-ex1-collapse">
            <ul class="nav navbar-nav">


                    <li class="dropdown hidden-md hidden-lg hidden-xl">
                        <a class="dropdown-toggle" data-toggle="dropdown" href="javascript:void(0)">
                            <i class="fa fa-user"></i><span class="caret"></span>
                        </a>
                        <ul class="dropdown-menu">
                        </ul>
                    </li>

                  <ul class="nav navbar-nav hidden-xs hidden-sm">
                  </ul>
            </ul>

                <ul class="nav navbar-nav hidden-md hidden-lg hidden-xl">
                    <li class="dropdown hidden-md hidden-lg hidden-xl">
                        <a class="dropdown-toggle" data-toggle="dropdown" href="javascript:void(0)">
                            <i class="fa fa-folder-o"></i><span class="caret"></span>
                        </a>
                        <ul class="dropdown-menu">
                            <li >
                                <a href="https://x-wei.github.io/category/misc.html"><i class="fa fa-folder-o"></i> Misc</a>
                            </li>
                            <li >
                                <a href="https://x-wei.github.io/category/music.html"><i class="fa fa-folder-o"></i> Music</a>
                            </li>
                            <li class="active">
                                <a href="https://x-wei.github.io/category/notes.html"><i class="fa fa-folder-o"></i> Notes</a>
                            </li>
                            <li >
                                <a href="https://x-wei.github.io/category/soft.html"><i class="fa fa-folder-o"></i> Soft</a>
                            </li>
                            <li >
                                <a href="https://x-wei.github.io/category/tech.html"><i class="fa fa-folder-o"></i> Tech</a>
                            </li>
                        </ul>
                    </li>
                </ul>

                    <ul class="nav navbar-nav hidden-xs hidden-sm">
                        <li >
                            <a href="https://x-wei.github.io/category/misc.html"><i class="fa fa-folder-o"></i> Misc</a>
                        </li>
                        <li >
                            <a href="https://x-wei.github.io/category/music.html"><i class="fa fa-folder-o"></i> Music</a>
                        </li>
                        <li class="active">
                            <a href="https://x-wei.github.io/category/notes.html"><i class="fa fa-folder-o"></i> Notes</a>
                        </li>
                        <li >
                            <a href="https://x-wei.github.io/category/soft.html"><i class="fa fa-folder-o"></i> Soft</a>
                        </li>
                        <li >
                            <a href="https://x-wei.github.io/category/tech.html"><i class="fa fa-folder-o"></i> Tech</a>
                        </li>
                    </ul>


            <ul class="nav navbar-nav navbar-right hidden-sm hidden-md hidden-lg hidden-xl">
                <li class="dropdown hidden-md hidden-lg hidden-xl">
                    <a class="dropdown-toggle" data-toggle="dropdown" href="javascript:void(0)">
                        <i class="fa fa-search"></i><span class="caret"></span>
                    </a>
                    <ul class="dropdown-menu">
                      <li><span>
                        <form class="navbar-search" action="/search.html">
                          <input type="text" class="search-query form-control col-lg-16" placeholder="Search" name="q" id="tipue_search_input" required>
                        </form></span>
                      </li>
                    </ul>
                </li>
            </ul>

            <ul class="nav navbar-right navbar-form hidden-xs">
              <li><span>
                <form class="navbar-search" action="/search.html">
                  <input type="text" class="search-query form-control col-lg-16" placeholder="查找" name="q" id="tipue_search_input" required>
                </form></span>
              </li>
            </ul>

            <ul class="nav navbar-nav navbar-right hidden-xs">
              <li><a href="https://x-wei.github.io/archives.html"><i class="fa fa-th-list"></i><span class="icon-label">Archive</span></a></li>
            </ul>
        </div>
        <!-- /.navbar-collapse -->
    </div>
</div> <!-- /.navbar -->
<!-- Banner -->
<!-- End Banner -->
<div class="container" style="min-height: 100%;height: auto !important;height: 100%;">
    <div class="row" style="padding-bottom:80px;padding-top:80px;">
        <div class="col-xl-21 col-lg-20 col-md-18">
            <div id="loading-block">
            <ol class="breadcrumb">
                <li><a href="https://x-wei.github.io/" title="mx's blog"><i class="fa fa-home fa-lg"></i></a></li>
                <li><a href="https://x-wei.github.io/category/notes.html" title="notes">notes</a></li>
                <li class="active">[Improving Deep Neural Networks] week1. Practical aspects of Deep Learning</li>
            </ol>
    <section id="content" class="article-content">
        <article>
            <header class="page-header jumbotron jumbotron-primary panel-primary" id="article-header">
                <div class="panel-heading">
                    <h1>
                        [Improving Deep Neural Networks] week1. Practical aspects of Deep Learning
                        <a href="https://x-wei.github.io/Ng_DLMooc_c2wk1.html"
                           rel="bookmark"
                           class="btn btn-primary btn-lg"
                           title="到 [Improving Deep Neural Networks] week1. Practical aspects of Deep Learning 的永久链接">
                           <i class="mdi-action-launch"></i>
                        </a>
                    </h1>
                </div>
                <div class="panel-body">
<div class="post-info">
    <span class="published">
        <time datetime="2017-10-21T00:00:00+02:00"><i class="fa fa-calendar"></i> Sat, 21 Oct 2017</time>
    </span>


<span class="btn-group">
	<a href="https://x-wei.github.io/tag/deep-learning.html" class="btn btn-primary btn-xs"><i class="fa fa-tag"></i> deep learning</a>
</span>
    
            <span class="label label-default">Series</span>
            Part 5 of «Andrew Ng Deep Learning MOOC» 
    

</div><!-- /.post-info -->                </div>
            </header>

            <div class="entry-content jumbotron" id="article-content">
                    <div class="panel panel-default">
                        <div class="panel-heading">
                        目录
                        </div>
                        <div class="panel-boy">
                        <div id="toc"><ul><li><a class="toc-href" href="#setting-up-your-maching-learning-application" title="Setting up your Maching Learning Application">Setting up your Maching Learning Application</a><ul><li><a class="toc-href" href="#train-dev-test-sets" title="Train / Dev / Test sets">Train / Dev / Test sets</a></li><li><a class="toc-href" href="#bias-variance" title="Bias / Variance">Bias / Variance</a></li><li><a class="toc-href" href="#basic-recipe-for-machine-learning" title="Basic Recipe for Machine Learning">Basic Recipe for Machine Learning</a></li></ul></li><li><a class="toc-href" href="#regularizing-your-neural-network_1" title="Regularizing your neural network">Regularizing your neural network</a><ul><li><a class="toc-href" href="#regularization" title="Regularization">Regularization</a><ul><li><a class="toc-href" href="#example-logistic-regression" title="example: logistic regression">example: logistic regression</a></li><li><a class="toc-href" href="#example-nn" title="example: NN">example: NN</a></li></ul></li><li><a class="toc-href" href="#why-regularization-reduces-overfitting_1" title="Why regularization reduces overfitting?">Why regularization reduces overfitting?</a></li><li><a class="toc-href" href="#dropout-regularization" title="Dropout Regularization">Dropout Regularization</a><ul><li><a class="toc-href" href="#dropout-implementation-inverted-dropout" title='dropout implementation: "inverted dropout"'>dropout implementation: "inverted dropout"</a></li></ul></li><li><a class="toc-href" href="#understanding-dropout_1" title="Understanding Dropout">Understanding Dropout</a></li><li><a class="toc-href" href="#other-regularization-methods" title="Other regularization methods">Other regularization methods</a><ul><li><a class="toc-href" href="#data-augmentation" title="data augmentation">data augmentation</a></li><li><a class="toc-href" href="#early-stopping" title="early stopping">early stopping</a></li></ul></li></ul></li><li><a class="toc-href" href="#setting-up-your-optimization-problem_2" title="Setting up your optimization problem">Setting up your optimization problem</a><ul><li><a class="toc-href" href="#normalizing-inputs" title="Normalizing inputs">Normalizing inputs</a><ul><li><a class="toc-href" href="#vanishing-exploding-gradients" title="Vanishing / Exploding gradients">Vanishing / Exploding gradients</a></li></ul></li><li><a class="toc-href" href="#weight-initialization-for-deep-networks_1" title="Weight Initialization for Deep Networks">Weight Initialization for Deep Networks</a></li><li><a class="toc-href" href="#numerical-approximation-of-gradients" title="Numerical approximation of gradients">Numerical approximation of gradients</a></li><li><a class="toc-href" href="#gradient-checking" title="Gradient checking">Gradient checking</a></li><li><a class="toc-href" href="#gradient-checking-implementation-notes" title="Gradient Checking Implementation Notes">Gradient Checking Implementation Notes</a></li></ul></li></ul></div>
                        </div>
                    </div>
                <h1 id="setting-up-your-maching-learning-application">Setting up your Maching Learning Application</h1>
<h2 id="train-dev-test-sets">Train / Dev / Test sets</h2>
<p>Applied ML: highly iterative process. <em>idea-code-exp loop</em> </p>
<p><strong>splitting data</strong><br/>
splitting data in order to speed up the idea-code-exp loop:  <br/>
*training set / dev(hold-out/cross-validataion) set / test set *  </p>
<p><strong>split ratio</strong>:  </p>
<ul>
<li>with 100~10000 examples: 70/30 or 60/20/20  </li>
<li>with ~1M examples: dev/test set can have much smaller ratio, e.g. 98/1/1  </li>
</ul>
<p><strong>mismatched train/test distribution</strong><br/>
training and test set don't come from the same dist.<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image.png"/> </p>
<ul>
<li>rule of thumb: <strong>make sure</strong> <strong>dev and test set come from the same distribution.</strong> </li>
<li>might be OK to only have dev set. — thought  in this case no longer have unbiased estimate of performance.  </li>
</ul>
<h2 id="bias-variance">Bias / Variance</h2>
<ul>
<li>high variance: <em>overfitting</em> </li>
<li>high bias: <em>underfitting</em> </li>
</ul>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image001.png"/><br/>
high base and high variance (worse case): high bias in some region and high variance elsewhere<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image004.png"/> </p>
<p><strong>how to estimate bias&amp;variance</strong><br/>
→ <em>look at train and dev set error</em> </p>
<ul>
<li>high variance: Err_train &lt;&lt; Err_dev — not generalize well  </li>
<li>high bias: Err_train ~= Err_dev, and Err_train &gt;&gt; Err_human — not learning well even on training set  </li>
<li>high bias <em>and</em> high variance (worse): Err_train &gt;&gt; Err_human, Err_train &gt;&gt; Err_dev  </li>
</ul>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image003.png"/> </p>
<h2 id="basic-recipe-for-machine-learning">Basic Recipe for Machine Learning</h2>
<p>basic recipe:  </p>
<ol>
<li>does algo have high bias ? (look at Err_train)  <ul>
<li>if yes → try bigger nn / other architecture  </li>
<li>until having low bias (fit well training set)  </li>
</ul>
</li>
<li>high variance ? (look at Err_dev)  <ul>
<li>if yes → get more data / regularization / other architecture  </li>
</ul>
</li>
</ol>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image005.png"/> </p>
<p><strong>bias-variance tradeoff</strong> </p>
<ul>
<li>in pre-DL era, bias and variance are tradeoff (decrease one → increase the other)  </li>
<li>in DL era: <em>if getting bigger nn and more data always possible</em>, <em>both can be reduced</em> </li>
</ul>
<p>(when well regularized,) <em>"training a bigger NN almost never hurts."</em> </p>
<h1 id="regularizing-your-neural-network_1">Regularizing your neural network</h1>
<p>2 ways to reduce variance: regularize, or get more data.  </p>
<h2 id="regularization">Regularization</h2>
<h3 id="example-logistic-regression">example: logistic regression</h3>
<ul>
<li>params: <code>w</code>, <code>b</code> </li>
<li>cost function <code>J(w,b) = 1/m * L(yhat_i, yi)</code> </li>
</ul>
<p>→ add one more term to cost <code>J</code>: adding L2 norm of <code>w</code>(<em>L2 regularization</em>)<br/>
(lambda: regularization param)<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image006.png"/><br/>
<em>just omit regularizing b</em>: <code>w</code> is high dim, <code>b</code> is single number.  </p>
<p>L1 regularization: L1 norm of <code>w</code> → <em>w will be sparse → </em>compressing the model (just a little bit)<br/>
⇒ <em>L2-reg is much often used</em> </p>
<h3 id="example-nn">example: NN</h3>
<ul>
<li>params: <code>w[l]</code>, <code>b[l]</code> for l = 1..N  </li>
<li>sum of the norms of each <code>w[l]</code> matrix.  </li>
</ul>
<p>⇒ <em>"Frobenius norm"</em> of a matrix: sum (each element squared)<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image007.png"/> </p>
<p><strong>gradient descent</strong>: adding one more term from backprop<br/>
d(1/2m * ||w||) = lambda / m <br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image008.png"/> </p>
<p>L2-reg also called "<strong>weight decay</strong>": <br/>
with L2-reg, looks as if doing the backprop updating, with w being w' = (1-alpha*lambda/m) * w (decayed w)<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image009.png"/> </p>
<h2 id="why-regularization-reduces-overfitting_1">Why regularization reduces overfitting?</h2>
<p><em>why imposing small params prevents overfitting?</em> </p>
<p><strong>intuition 1</strong><br/>
→ heavy regularization <br/>
→ weight ~= 0 <br/>
→ many hidden units' impact are "<em>zeroed-out"</em><br/>
→ simpler NN<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image010.png"/> </p>
<p><strong>intuition 2</strong><br/>
e.g. activation g(z) = tanh(z)<br/>
small z → g(z) ~= linear, <br/>
large z → g(z) flattend<br/>
⇒ large lambda → small w <br/>
→ z small <br/>
→ every layer ~linear<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image011.png"/> </p>
<h2 id="dropout-regularization">Dropout Regularization</h2>
<p>another powerful method of regularization<br/>
<strong>dropout</strong>: <em>For each training example</em>, in each layer, <em>eliminate randomly some of its output values.</em> <br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image012.png"/> <img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image013.png"/> </p>
<h3 id="dropout-implementation-inverted-dropout">dropout implementation: "inverted dropout"</h3>
<p>example: dropout of <em>layer 3</em>, keep_prob = 0.8 (prob of keeping hidden unit)<br/>
→ generate a rand matrix of shape the same shape as activation <code>a[3]</code> </p>
<div class="highlight"><pre><span class="code-line"><span></span><span class="err">d3 = np.random.rand(a3.shape[0], a3.shape[1]) &lt; keep_prob  # d3 is bool matrix  </span></span>
<span class="code-line"><span class="err">a3 = np.multiply(a3, d3)  # element-wise multiply  </span></span>
<span class="code-line"><span class="err">a3 /= keep_prob  # ****"inverted dropout"****</span></span>
</pre></div>
<p><strong>"inverted dropout": why a3 /= keep_prob (i.e. make a3 larger)?</strong> </p>
<ul>
<li>let's say layer 3 has 50 units, keep_prob = 0.8   </li>
<li>→ ~10 units shut off  </li>
<li><code>z[4] = w[4] * a[3] + b[4]</code> </li>
</ul>
<p>⇒ a[3] have random 20% units shut off <br/>
→ <em>w[4]</em>a[3] will be reduced by 20% in expection*  </p>
<ul>
<li>inverted dropout: a3 /= keep_prob, to <em>keep expected value a3 remains unchanged</em>.  </li>
<li>(No dropout at test time) → inverted dropout <em>avoids scaling problem at test time</em> </li>
</ul>
<p><strong>making predictions at test time</strong><br/>
NOT use dropout at test time ⇒ don't want output to be random at test time...  </p>
<h2 id="understanding-dropout_1">Understanding Dropout</h2>
<p><em>why randomly shut units prevents overfitting ?</em> </p>
<p><strong>Intuition: can't rely on any one input feature → have to spread out weight</strong><br/>
spread weights ~→ smaller L2 norm (shrink weights)<br/>
Can be formally proven: dropout is equal to <em>adaptive</em> L2-reg, with penalty of different weight being different.  </p>
<p>For one hidden unit: any of it input features (from prev layer) can go out at random<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image014.png"/> </p>
<p><strong>Implementation details</strong> </p>
<ul>
<li>vary keep_prob for different layer   </li>
</ul>
<p>→ <em>smaller keep_prob for larger layer</em> </p>
<ul>
<li>usually no dropout (or very small dropout) for input layer...  </li>
</ul>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image016.png"/> </p>
<p><strong>Downside of dropout</strong><br/>
cost function J <em>no longer well-defined </em>(because output yhat is random)<br/>
→ can no longer plot cost-iter curve<br/>
→ turn off dropout before plotting the curve  </p>
<h2 id="other-regularization-methods">Other regularization methods</h2>
<h3 id="data-augmentation">data augmentation</h3>
<p>adding more training example is expensive <br/>
→ vary existing training data (e.g. flipping/rand-distortions of training image for cats)  </p>
<h3 id="early-stopping">early stopping</h3>
<p>plot Err or J to #iterations <em>for both train and dev set.</em><br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image017.png"/> </p>
<p><strong>Downside of early-stopping</strong>: <br/>
<em>optimization cost J</em> and <em>not overfitting</em> should be separated task ("Orthogonalization")<br/>
→ early-stopping couples the two jobs.  </p>
<p>upside of early stopping: no need to try different values of regularization param (lambda) → finds "mid-size w" at once.  </p>
<h1 id="setting-up-your-optimization-problem_2">Setting up your optimization problem</h1>
<p>How to speed up training (i.e. optimize J)  </p>
<h2 id="normalizing-inputs">Normalizing inputs</h2>
<p>normalize input:  </p>
<ol>
<li>substract mean   </li>
<li>normalize variance  </li>
</ol>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image018.png"/><br/>
detail: in data splitting, <em>use the same meu/sigma to normalize test set !</em> </p>
<p><strong>why normalizing input ?</strong><br/>
if features x1 x2 are on different scales → w1 and w2 not same scale<br/>
J is more symmetric, easier to optimize<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image019.png"/> </p>
<h3 id="vanishing-exploding-gradients">Vanishing / Exploding gradients</h3>
<p>One problem in training very deep NN: vanishing/exploding gradients.  </p>
<p>example: a very deep NN, each layer 2 units, linear activation g(z)=z, ignore bias b[l] = 0.<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image021.png"/><br/>
linear activations → y is just a linear transformation of x<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image022.png"/> </p>
<ul>
<li>assuming each w[l] = 1.5 * Identity_matrix ⇒ activations increase exponentially  </li>
<li>assuming each w[l] = 0.5 * Id ⇒ activations decrease exponentially  </li>
</ul>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image023.png"/><br/>
yhat too large or too small → hard to train  </p>
<h2 id="weight-initialization-for-deep-networks_1">Weight Initialization for Deep Networks</h2>
<p>A partial solution of vanishing/exploding gradient problem: <em>carefully initialize weights</em>.  </p>
<p><strong>single neuron example:</strong><br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image024.png"/> </p>
<ul>
<li>y = g(w*x), g = relu  </li>
<li>n = # inputs for   </li>
</ul>
<p>z = w1<em>x1 + ... + wn</em>xn, <br/>
if wi are initzed randomly<br/>
→ large ns ⇒ z will be large ! <br/>
⇒ <strong>set var(wi) = 1/n</strong> (2/n in practice) to keep z in similar scale for diffent #inputs<br/>
initialization code: <br/>
<code>w[l] = np.random.randn(shape[l]) * np.sqrt( 2 / n[l-1] )  # n[l-1] = #inputs for layer-l</code> </p>
<p><strong>other variants</strong><br/>
when activation function g = tanh <br/>
⇒ use var(wi) = 1/n ("<strong>Xavier initialization</strong>")<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image025.png"/> </p>
<h2 id="numerical-approximation-of-gradients">Numerical approximation of gradients</h2>
<p><strong>checking the derivative computation</strong><br/>
example: f(x) = x ^ 3<br/>
→ <em>vary x by epsilon</em> to approximate f'(x), <em>use 2-sided difference</em><br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image026.png"/> </p>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image027.png"/> </p>
<p>error order = O(epsilon^2) for 2-sided difference<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image028.png"/> </p>
<h2 id="gradient-checking">Gradient checking</h2>
<p><strong>Verify</strong> that your implementation is correct. — help finding out bugs in implementation early.  </p>
<ul>
<li>concat all params into a big vector <code>theta</code> </li>
<li>concat all dW[l] db[l] into big vector <code>d_theta</code> </li>
<li>to <strong>check if d_theta is correct</strong>: construct a <code>d_theta_approx</code> vector  </li>
</ul>
<p><img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image029.png"/><br/>
⇒ <br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image030.png"/> </p>
<p>How to check "approximate":<br/>
<img alt="" class="img-responsive" src="../images/Ng_DLMooc_c2wk1//pasted_image031.png"/> </p>
<h2 id="gradient-checking-implementation-notes">Gradient Checking Implementation Notes</h2>
<ul>
<li>Dont' use checking in training: constructing d_theta_approx is slow  </li>
<li>When check fails: look at components to try to find bug  </li>
<li>Remember regularization: J contains reg term as well  </li>
<li>Doesn't work with dropout: J not well defined (random variable), turn dropout off before checking.  </li>
<li>Run check at random initialization (w,b~=0), then again after some training(w,b~&gt;0)  </li>
</ul>
            </div>
            <div class="entry-content jumbotron" id="source-content" style="display: none">
                <!-- <pre><code id="source-code">
                </code></pre> -->
                <div id="source-code"></div>
            </div>
            <!-- /.entry-content -->

            <div class="row" id="prevnext">
                <div class="col-xs-12">
                    <a href="https://x-wei.github.io/Ng_DLMooc_c1wk4.html" class="btn btn-default btn-lg" style="float:left;clear:both;background-color:#fff;">
                        <h4><i class="fa fa-arrow-left"></i>
                        [Neural Networks and Deep Learning] week4. Deep Neural Network
                        </h4>
                    </a>
                </div>
                <div class="col-xs-12">
                    <a href="https://x-wei.github.io/Ng_DLMooc_c2wk3.html" class="btn btn-default btn-lg" style="float:right;clear:both;background-color:#fff;">
                        <h4>
                        [Improving Deep Neural Networks] week3. Hyperparameter tuning, Batch Normalization and Programming Frameworks<i class="fa fa-arrow-right"></i>
                        </h4>
                    </a>
                </div>
            </div>

            <div class="panel panel-default" id="series">
                <div class="panel-heading">
                  <h4>
                  Part 5 of series «Andrew Ng Deep Learning MOOC»：
                  </h4>
                </div>
                <ul class="list-group">
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c1wk1.html' style="text-align:left">[Neural Networks and Deep Learning] week1. Introduction to deep learning</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c1wk2.html' style="text-align:left">[Neural Networks and Deep Learning] week2. Neural Networks Basics</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c1wk3.html' style="text-align:left">[Neural Networks and Deep Learning] week3. Shallow Neural Network</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c1wk4.html' style="text-align:left">[Neural Networks and Deep Learning] week4. Deep Neural Network</a></li>
                        <li class="list-group-item"><a class="btn btn-primary" href='https://x-wei.github.io/Ng_DLMooc_c2wk1.html' style="text-align:left">[Improving Deep Neural Networks] week1. Practical aspects of Deep Learning</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c2wk2.html' style="text-align:left">[Improving Deep Neural Networks] week2. Optimization algorithms</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c2wk3.html' style="text-align:left">[Improving Deep Neural Networks] week3. Hyperparameter tuning, Batch Normalization and Programming Frameworks</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c3wk1.html' style="text-align:left">[Structuring Machine Learning Projects] week1. ML Strategy (1)</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c3wk2.html' style="text-align:left">[Structuring Machine Learning Projects] week2. ML Strategy (2)</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c4wk1.html' style="text-align:left">[Convolutional Neural Networks] week1. Foundations of Convolutional Neural Networks</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c4wk2.html' style="text-align:left">[Convolutional Neural Networks] week2. Deep convolutional models: case studies</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c4wk3.html' style="text-align:left">[Convolutional Neural Networks] week3. Object detection</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c4wk4.html' style="text-align:left">[Convolutional Neural Networks] week4. Special applications: Face recognition & Neural style transfer</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c5wk1.html' style="text-align:left">[Sequential Models] week1. Recurrent Neural Networks</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c5wk2.html' style="text-align:left">[Sequential Models] week2. Natural Language Processing & Word Embeddings</a></li>
                        <li class="list-group-item"><a class="btn btn-default" href='https://x-wei.github.io/Ng_DLMooc_c5wk3.html' style="text-align:left">[Sequential Models] week3. Sequence models & Attention mechanism</a></li>
                </ul>
            </div>

<section class="comments" id="comments">
    <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true">
      <div class="panel panel-primary">
        <div class="panel-heading" role="tab" id="disqus-heading">
          <h4 class="panel-title">
            <a data-toggle="collapse" data-parent="#accordion" href="#disqus-comments" aria-expanded="true" aria-controls="disqus-comments">
              <i class="fa fa-comments-o"></i> Disqus 留言
            </a>
          </h4>
        </div>
        <div id="disqus-comments" class="panel-collapse collapse.show" role="tabpanel" aria-labelledby="disqus-heading">
          <div class="panel-body">
            <div class="tab-pane fade active in" id="disqus-comments">
                <div id="disqus_thread"></div>
                <script type="text/javascript">
                    /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
                    var disqus_shortname = 'xweisblog'; // required: replace example with your forum shortname

                            var disqus_identifier = 'Ng_DLMooc_c2wk1';
                        var disqus_url = 'https://x-wei.github.io/Ng_DLMooc_c2wk1.html';

                    var disqus_config = function () {

                        this.language = "zh";
                    };

                    /* * * DON'T EDIT BELOW THIS LINE * * */
                    (function () {
                        var dsq = document.createElement('script');
                        dsq.type = 'text/javascript';
                        dsq.async = true;
                        dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
                        (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
                    })();
                </script>
                <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by
                    Disqus.</a></noscript>
                <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
            </div>
          </div>
        </div>
      </div>
    </div>
</section>        </article>
    </section>


            </div>
<!-- Modal -->
<div class="modal fade" id="myModal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
    <div class="modal-dialog modal-lg">
        <div class="modal-content">
          <a data-dismiss="modal" href="javascript:void(0);">
            <img id="mimg" src="" style="width:100%;height:auto">
          </a>
        </div>
  </div><!-- /.modal-dialog -->
</div><!-- /.modal -->
        </div>
        <div class="col-xl-3 col-lg-4 col-md-6" id="sidebar">
            <aside>

<section>
    <div class="sidebar-container">
<div class="sidebar-item ">
<div class="panel panel-default">
	<div class="panel-heading">
        <h4>
<i class="fa fa-user fa-lg"></i>
<a href="https://x-wei.github.io/about.html">
关于  mx
</a>
        </h4>
    </div>
<div class="panel-body" id="aboutme">
        <a href="https://x-wei.github.io/about.html"><img width="100%" src="https://x-wei.github.io/../images/mx.jpg"/></a>
    
<h3 style="text-align:center">
<a href="https://github.com/x-wei" target="_blank">
<i class="fa fa-github" style="text-align:center"></i></a>
<a href="https://weibo.com/u/1817154611" target="_blank">
<i class="fa fa-weibo" style="text-align:center"></i></a>
<a href="mailto:xwei.mx@gmail.com" target="_blank">
<i class="mdi-communication-email" style="text-align:center"></i></a>
</h3>

<h4 class="widget-title">推荐文章</h4>
<div class="textwidget">
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/TeXmacs_intro.html">学术文章写作利器: TeXmacs介绍</a><br></li>
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/hashcode2014-solved-by-LP.html">运筹的力量: 用线性规划解决Google 2014 HashCode问题</a><br></li>
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F%E5%85%A5%E9%97%A8%E7%AE%80%E4%BB%8B.html">正则表达式入门简介</a><br></li>
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/%E6%88%91%E7%9A%84ubuntu10.04%E9%85%8D%E7%BD%AE%E6%80%BB%E7%BB%93.html">我的ubuntu10.04配置总结</a><br></li>
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/PT-summery.html">2011巴黎高科(ParisTech)申请总结</a><br></li>
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/GT-summery.html">用尽量少的时间考一个够用的分数--一点Tofel/GRE备考经验</a><br></li>
<li class="widget-container widget_text">
<a href="https://x-wei.github.io/pelican_github_blog.html">用pelican在github上创建自己的博客!</a><br></li>
</div>

<br><a href="https://www.polytechnique.edu/" target="_blank">
<img src="https://x-wei.github.io/images/x-logo.png" alt="X" width="180" border="0" />
</a><br/>

<br><a href="https://www.sjtu.edu.cn/">
<img src="https://x-wei.github.io/images/ssss.jpg" width="180" border="0" alt="上海西南某高校">
</a><br/>

<br>
<h4 class="widget-title">Visitors</h4>
<script type="text/javascript" src="//rf.revolvermaps.com/0/0/1.js?i=59olkba9w7e&amp;s=220&amp;m=3&amp;v=true&amp;r=false&amp;b=000000&amp;n=false&amp;c=ff0000" async="async"></script>

<!-- hitwebcounter Code START -->
<a href="https://www.hitwebcounter.com/how-to/how-to-what-is-free-blog-counter.php" target="_blank">
<img src="https://hitwebcounter.com/counter/counter.php?page=5954927&style=0036&nbdigits=5&type=ip&initCount=0" title="web counter" Alt="web counter"   border="0" ></a>
<br/>

</div>
</div>
</div>        


<div class="sidebar-item ">
<div class="panel panel-default">
	<div class="panel-heading">
        <h4>
<a href="https://x-wei.github.io/tags.html"><i class="fa fa-tags fa-lg"></i><span class="icon-label">标签云</span></a>
        </h4>
    </div>
<div class="panel-body">
    <ul class="list-group list-inline tagcloud" id="tags">
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/pelican.html">
                    pelican <sup> 6</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/google.html">
                    google <sup> 6</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/torch.html">
                    torch <sup> 6</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/tex.html">
                    tex <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/ba-li-gao-ke.html">
                    巴黎高科 <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/markdown.html">
                    markdown <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/linux.html">
                    linux <sup> 3</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/inf422.html">
                    inf422 <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/toefl.html">
                    Toefl <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/regex.html">
                    regex <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/git.html">
                    git <sup> 5</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/weka.html">
                    weka <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-1">
                <a href="https://x-wei.github.io/tag/scala.html">
                    scala <sup> 12</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/opencv.html">
                    opencv <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/scrapy.html">
                    scrapy <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/gre.html">
                    GRE <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/lp.html">
                    LP <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/android.html">
                    android <sup> 9</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/pandas.html">
                    pandas <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/codejam.html">
                    codejam <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/fr.html">
                    fr <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/shell.html">
                    shell <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-1">
                <a href="https://x-wei.github.io/tag/python.html">
                    python <sup> 13</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/simongarfunkel.html">
                    Simon&Garfunkel <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/french.html">
                    french <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/wai-guan.html">
                    外观 <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/zhong-wen-luan-ma.html">
                    中文乱码 <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/host.html">
                    host <sup> 3</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/ssh.html">
                    ssh <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/ubuntu.html">
                    ubuntu <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/java.html">
                    java <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/spark.html">
                    spark <sup> 6</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/tips.html">
                    tips <sup> 3</sup>
                </a>
            </li>
            <li class="list-group-item tag-1">
                <a href="https://x-wei.github.io/tag/deep-learning.html">
                    deep learning <sup> 28</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/c.html">
                    C++ <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-1">
                <a href="https://x-wei.github.io/tag/algorithm.html">
                    algorithm <sup> 35</sup>
                </a>
            </li>
            <li class="list-group-item tag-4">
                <a href="https://x-wei.github.io/tag/texmacs.html">
                    TeXmacs <sup> 1</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/ml.html">
                    ml <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/eclipse.html">
                    eclipse <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/ocaml.html">
                    OCaml <sup> 8</sup>
                </a>
            </li>
            <li class="list-group-item tag-2">
                <a href="https://x-wei.github.io/tag/r.html">
                    R <sup> 4</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/chu-guo.html">
                    出国 <sup> 2</sup>
                </a>
            </li>
            <li class="list-group-item tag-3">
                <a href="https://x-wei.github.io/tag/kuai-jie-jian.html">
                    快捷键 <sup> 3</sup>
                </a>
            </li>
    </ul>
</div>
</div>
</div>        
                    <li class="list-group-item"><h4><i class="fa fa-tags fa-list-ul"></i><span class="icon-label">Series</span></h4>
                        <ul class="list-group">
                            <li class="list-group-item">
                                <h5></i> Previous article</h5>
                                <a href="https://x-wei.github.io/Ng_DLMooc_c1wk4.html">[Neural Networks and Deep Learning] week4. Deep Neural Network</a>
                            </li>
                            <li class="list-group-item">
                                <h5>Next article</h5>
                                <a href="https://x-wei.github.io/Ng_DLMooc_c2wk2.html">[Improving Deep Neural Networks] week2. Optimization algorithms</a>
                            </li>
                        </ul>
                    </li>
        

<div class="sidebar-item hidden-xs">
<div class="panel panel-default">
	<div class="panel-heading">
        <h4>
<i class="fa fa-github fa-lg"></i><span class="icon-label">GitHub仓库</span>
        </h4>
    </div>
    <div class="panel-body">
        <div id="gh_repos">
            <p class="list-group-item">Status updating...</p>
        </div>
            <a href="https://github.com/x-wei">@x-wei</a> on GitHub
    </div>
</div>
</div>

    </div>
</section>
        <div class="panel panel-default hidden-xs hidden-sm" id="affix-toc">
            <div class="panel-heading"><h4>
            目录</h4>
            </div>
            <div class="panel-boy">
            <div id="toc"><ul><li><a class="toc-href" href="#setting-up-your-maching-learning-application" title="Setting up your Maching Learning Application">Setting up your Maching Learning Application</a><ul><li><a class="toc-href" href="#train-dev-test-sets" title="Train / Dev / Test sets">Train / Dev / Test sets</a></li><li><a class="toc-href" href="#bias-variance" title="Bias / Variance">Bias / Variance</a></li><li><a class="toc-href" href="#basic-recipe-for-machine-learning" title="Basic Recipe for Machine Learning">Basic Recipe for Machine Learning</a></li></ul></li><li><a class="toc-href" href="#regularizing-your-neural-network_1" title="Regularizing your neural network">Regularizing your neural network</a><ul><li><a class="toc-href" href="#regularization" title="Regularization">Regularization</a><ul><li><a class="toc-href" href="#example-logistic-regression" title="example: logistic regression">example: logistic regression</a></li><li><a class="toc-href" href="#example-nn" title="example: NN">example: NN</a></li></ul></li><li><a class="toc-href" href="#why-regularization-reduces-overfitting_1" title="Why regularization reduces overfitting?">Why regularization reduces overfitting?</a></li><li><a class="toc-href" href="#dropout-regularization" title="Dropout Regularization">Dropout Regularization</a><ul><li><a class="toc-href" href="#dropout-implementation-inverted-dropout" title='dropout implementation: "inverted dropout"'>dropout implementation: "inverted dropout"</a></li></ul></li><li><a class="toc-href" href="#understanding-dropout_1" title="Understanding Dropout">Understanding Dropout</a></li><li><a class="toc-href" href="#other-regularization-methods" title="Other regularization methods">Other regularization methods</a><ul><li><a class="toc-href" href="#data-augmentation" title="data augmentation">data augmentation</a></li><li><a class="toc-href" href="#early-stopping" title="early stopping">early stopping</a></li></ul></li></ul></li><li><a class="toc-href" href="#setting-up-your-optimization-problem_2" title="Setting up your optimization problem">Setting up your optimization problem</a><ul><li><a class="toc-href" href="#normalizing-inputs" title="Normalizing inputs">Normalizing inputs</a><ul><li><a class="toc-href" href="#vanishing-exploding-gradients" title="Vanishing / Exploding gradients">Vanishing / Exploding gradients</a></li></ul></li><li><a class="toc-href" href="#weight-initialization-for-deep-networks_1" title="Weight Initialization for Deep Networks">Weight Initialization for Deep Networks</a></li><li><a class="toc-href" href="#numerical-approximation-of-gradients" title="Numerical approximation of gradients">Numerical approximation of gradients</a></li><li><a class="toc-href" href="#gradient-checking" title="Gradient checking">Gradient checking</a></li><li><a class="toc-href" href="#gradient-checking-implementation-notes" title="Gradient Checking Implementation Notes">Gradient Checking Implementation Notes</a></li></ul></li></ul></div>
            </div>
        </div>
            </aside>
        </div>
    </div>
</div>


<footer id="fcfooter">
   <hr/>
   
   <div class="container">
         links : 
                <a href="https://farseerfc.github.com/">farseerfc</a>
                <a href="https://hyhx2008.github.com/">H.Y.</a>
                <a href="https://reginald1787.github.io/">reginald1787</a>
                <a href="https://log.dofine.me/">dofine</a>

   
      <div class="row">
      
         
         <div class="col-md-14">
         <p><small>
            &copy; 2020 mx
            &middot; 通过
            <a href="https://docs.getpelican.com/" target="_blank">Pelican</a> 生成                <a rel="license" href="https://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="//i.creativecommons.org/l/by-nc-sa/4.0/80x15.png" /></a>
    <!-- Content -->
  <!-- licensed under a <a rel="license" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution 4.0 International License</a>, except where indicated otherwise. -->

         </small></p>
         </div>
      </div>
   </div>
   <a href="#" class="btn btn-primary btn-fab btn-raised mdi-editor-vertical-align-top withripple" style="position:fixed;bottom:30px;right:30px;z-index:1000"></a>
</footer>
<script src="https://x-wei.github.io/theme/js/jquery.min.js"></script>

<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://x-wei.github.io/theme/js/bootstrap.min.js"></script>

<!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond) -->
<script src="https://x-wei.github.io/theme/js/respond.min.js"></script>

    <!-- GitHub JS -->
    <script type="text/javascript">
        $(document).ready(function () {
            if (!window.jXHR) {
                var jxhr = document.createElement('script');
                jxhr.type = 'text/javascript';
                jxhr.src = 'https://x-wei.github.io/theme/js/jXHR.js';
                var s = document.getElementsByTagName('script')[0];
                s.parentNode.insertBefore(jxhr, s);
            }

            github.showRepos({
                user: 'x-wei',
                count: 5,
                skip_forks: false,
                target: '#gh_repos'
            });
        });
    </script>
    <script src="https://x-wei.github.io/theme/js/github.js" type="text/javascript"></script>
    <!-- End GitHub JS Code -->
    <!-- Disqus -->
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'xweisblog'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function () {
            var s = document.createElement('script');
            s.async = true;
            s.type = 'text/javascript';
            s.src = '//' + disqus_shortname + '.disqus.com/count.js';
            (document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
        }());
    </script>
    <!-- End Disqus Code -->
    <!-- Google Analytics -->
    <script type="text/javascript">

        var _gaq = _gaq || [];
        _gaq.push(['_setAccount', 'UA-30756331-1']);
        _gaq.push(['_trackPageview']);

        (function () {
            var ga = document.createElement('script');
            ga.type = 'text/javascript';
            ga.async = true;
            ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
            var s = document.getElementsByTagName('script')[0];
            s.parentNode.insertBefore(ga, s);
        })();
    </script>
    <!-- End Google Analytics Code -->

<script src="https://x-wei.github.io/theme/js/ripples.min.js"></script>
<script src="https://x-wei.github.io/theme/js/material.min.js"></script>
<script src="https://x-wei.github.io/theme/js/jquery.bootstrap-autohidingnavbar.min.js"></script>
<script>
    $(document).ready(function() {
        $.material.init();
        $("div.navbar").autoHidingNavbar();
        $(".img-responsive").css("cursor","pointer").on('click',function(){
            var sr=$(this).attr('src');
            $('#mimg').attr('src',sr);
            $('#myModal').modal('show');
        });
        $('#affix-toc').affix({
          offset: {
            top: function(){
                if($('#affix-toc').hasClass("affix"))
                    return $('#sidebar').height();
                return $('#sidebar').height() - $('#affix-toc').height();
            },
            bottom: function (){
                return $("#fcfooter").offset().top -
                    $("#article-content").offset().top -
                    $("#article-content").height() + 20;
            }
          }
        });
        $('#affix-toc').width($('#sidebar').width());
    });
    $(window).resize(function () {
       $('#affix-toc').width($('#sidebar').width());
    });
</script>

</body>
</html>