Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Haiyang Chang hw2 #18

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 154 additions & 13 deletions HW2_Policy_Graident.ipynb

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions Untitled.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ImportError",
"evalue": "No module named tensorflow",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-11-41389fad42b5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mImportError\u001b[0m: No module named tensorflow"
]
}
],
"source": [
"import tensorflow as tf"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
49 changes: 49 additions & 0 deletions Untitled1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ImportError",
"evalue": "No module named tensorflow",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-11-a649b509054f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mImportError\u001b[0m: No module named tensorflow"
]
}
],
"source": [
"import tensorflow"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
8 changes: 8 additions & 0 deletions policy_gradient/policy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tensorflow as tf
import numpy as np
import math

class CategoricalPolicy(object):
def __init__(self, in_dim, out_dim, hidden_dim, optimizer, session):
Expand Down Expand Up @@ -27,6 +28,12 @@ def __init__(self, in_dim, out_dim, hidden_dim, optimizer, session):
Sample solution is about 2~4 lines.
"""
# YOUR CODE HERE >>>>>>
W1 = tf.Variable(tf.truncated_normal([in_dim, hidden_dim],stddev=1.0))
W2 = tf.Variable(tf.truncated_normal([hidden_dim, out_dim],stddev=1.0))
b1 = tf.Variable(tf.zeros([hidden_dim]))
b2 = tf.Variable(tf.zeros([out_dim]))
hidden1 = tf.nn.tanh(tf.matmul(self._observations, W1) + b1)
probs = tf.nn.softmax(tf.matmul(hidden1, W2) + b2)
# probs = ???
# <<<<<<<<

Expand Down Expand Up @@ -69,6 +76,7 @@ def __init__(self, in_dim, out_dim, hidden_dim, optimizer, session):
Sample solution is about 1~3 lines.
"""
# YOUR CODE HERE >>>>>>
surr_loss = -tf.reduce_mean(tf.mul(log_prob, self._advantages))
# surr_loss = ???
# <<<<<<<<

Expand Down
9 changes: 8 additions & 1 deletion policy_gradient/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,12 @@ def flatten_space(space):

# def discount_cumsum(x, discount_rate):
# YOUR CODE HERE >>>>>>
def discount_cumsum(x, discount_rate):
len_x = len(x)
array_discount_rate = np.zeros(len_x)
for i in range(len_x):
array_discount_rate[i] = x[i] * discount_rate**i
array_discount_rate =np.cumsum(array_discount_rate)
return array_discount_rate[::-1]
# return ???
# <<<<<<<<
# <<<<<<<<
49 changes: 49 additions & 0 deletions report2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
***

## Homework2 Report

105061469 Haiyang Chang

***

## Problem1
I constructed a 2-layer neural network as required. The W1,W2 cannot be zeros like b1,b2, otherwise the Average Return will remain really low. I thought this is becauses the neural network would unable to learn the features of training data since 0*in_dim=0. So I use the truncated normal distribution with its stddev being the reciprocal to the input data to make it easier.
+ W1 = tf.Variable(tf.truncated_normal([in_dim, hidden_dim],stddev=1.0))
+ W2 = tf.Variable(tf.truncated_normal([hidden_dim, out_dim],stddev=1.0))
+ b1 = tf.Variable(tf.zeros([hidden_dim]))
b2 = tf.Variable(tf.zeros([out_dim]))
+ hidden1 = tf.nn.tanh(tf.matmul(self._observations, W1) + b1)
+ probs = tf.nn.softmax(tf.matmul(hidden1, W2) + b2)

## Problem2

The smaller loss, the higher gradient.
+ surr_loss = -tf.reduce_mean(tf.mul(log_prob, self._advantages))

## Problem3

I tested the function on python to ensure its accuracy.
+ def discount_cumsum(x, discount_rate):
len_x = len(x)
array_discount_rate = np.zeros(len_x)
for i in range(len_x):
array_discount_rate[i] = x[i] * discount_rate**i
array_discount_rate =np.cumsum(array_discount_rate)
return array_discount_rate[::-1]

## Problem4

There could be other forms, but this is the simplest form.
+ a = r - b

## Problem5

Without baseline the Average Return increased to a high level quickly, but finally achieved the similar level as the performance with baseline. The baseline is to guarantee that the gradient will increase. So if the initial state of neural network is not chosen properly, the performance without baseline may not be as satisfactory as with baseline do.
+ With baseline
![withbaseline.png](https://ooo.0o0.ooo/2016/12/08/5849ac501e81f.png)
+ Without baseline
![withoutbaseline.png](https://ooo.0o0.ooo/2016/12/08/5849ac501e0e4.png)

## Problem6

The reward is discounted by the discounted rate, with makes the later action seems less important than the former action. By normalizing the advantage, the training process can go on steadily.