-
Notifications
You must be signed in to change notification settings - Fork 16
/
replayMemory.py
87 lines (76 loc) · 3.7 KB
/
replayMemory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
import random
class ReplayMemory(object):
"""Replay Memory that stores the last size=1,000,000 transitions"""
def __init__(self, size=10000, vector_lenght=37,
agent_history_length=4, batch_size=32):
"""
Args:
size: Integer, Number of stored transitions
frame_height: Integer, Height of a frame of an Atari game
frame_width: Integer, Width of a frame of an Atari game
agent_history_length: Integer, Number of frames stacked together to create a state
batch_size: Integer, Number if transitions returned in a minibatch
"""
self.size = size
self.vector_lenght = vector_lenght
self.agent_history_length = agent_history_length
self.batch_size = batch_size
self.count = 0
self.current = 0
# Pre-allocate memory
self.actions = np.empty(self.size, dtype=np.int32)
self.rewards = np.empty(self.size, dtype=np.float32)
self.frames = np.empty((self.size, self.vector_lenght), dtype=np.uint8)
self.terminal_flags = np.empty(self.size, dtype=np.bool)
# Pre-allocate memory for the states and new_states in a minibatch
self.states = np.empty((self.batch_size, self.vector_lenght), dtype=np.float32)
self.new_states = np.empty((self.batch_size, self.vector_lenght), dtype=np.float32)
self.indices = np.empty(self.batch_size, dtype=np.int32)
def add_experience(self, action, frame, reward, terminal):
"""
Args:
action: An integer between 0 and env.action_space.n - 1
determining the action the agent perfomed
frame: A (84, 84, 1) frame of an Atari game in grayscale
reward: A float determining the reward the agend received for performing an action
terminal: A bool stating whether the episode terminated
"""
if frame.shape != (self.vector_lenght, ):
raise ValueError('Dimension of frame is wrong!')
self.actions[self.current] = action
self.frames[self.current, ...] = frame
self.rewards[self.current] = reward
self.terminal_flags[self.current] = terminal
self.count = max(self.count, self.current + 1)
self.current = (self.current + 1) % self.size
def _get_state(self, index):
if self.count is 0:
raise ValueError("The replay memory is empty!")
if index < self.agent_history_length - 1:
raise ValueError("Index must be min 3")
return self.frames[index - self.agent_history_length + 1:index + 1, ...]
def _get_valid_indices(self):
for i in range(self.batch_size):
while True:
index = random.randint(self.agent_history_length, self.count - 1)
if index < self.agent_history_length:
continue
if index >= self.current and index - self.agent_history_length <= self.current:
continue
if self.terminal_flags[index - self.agent_history_length:index].any():
continue
break
self.indices[i] = index
def get_minibatch(self):
"""
Returns a minibatch of self.batch_size = 32 transitions
"""
if self.count < self.agent_history_length:
raise ValueError('Not enough memories to get a minibatch')
self._get_valid_indices()
for i, idx in enumerate(self.indices):
self.states[i] = self.frames[idx-1]
self.new_states[i] = self.frames[idx]
return self.states, self.actions[self.indices], self.rewards[
self.indices], self.new_states, self.terminal_flags[self.indices]