-
Notifications
You must be signed in to change notification settings - Fork 0
/
Trainer.py
84 lines (72 loc) · 3.11 KB
/
Trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
import numpy as np
import keras
from AZK import X_train
from Helper import formatPrice, getState, plot_behavior
from agent import Agent
window_size = 1
agent = Agent(window_size)
#In this step we feed the closing value of the stock price
data = X_train
l = len(data) - 1
batch_size = 32
#An episode represents a complete pass over the data.
episode_count = 10
episode_count = int(episode_count)
for e in range(episode_count + 1):
print("Running episode " + str(e) + "/" + str(episode_count))
state = getState(data, 0, window_size + 1)
#set_trace()
total_profit = 0
agent.inventory = []
states_sell = []
states_buy = []
for t in range(l):
action = agent.act(state)
# sit
next_state = getState(data, t + 1, window_size + 1)
reward = 0
if action == 1: # buy
agent.inventory.append(data[t])
states_buy.append(t)
print("Buy: " + formatPrice(data[t]))
elif action == 2 and len(agent.inventory) > 0: # sell
bought_price = agent.inventory.pop(0)
reward = max(data[t] - bought_price, 0)
total_profit += data[t] - bought_price
states_sell.append(t)
print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
done = True if t == l - 1 else False
#appends the details of the state action etc in the memory, which is used further by the exeReply function
agent.memory.append((state, action, reward, next_state, done))
state = next_state
if done:
print("--------------------------------")
print("Total Profit: " + formatPrice(total_profit))
print("--------------------------------")
#set_trace()
print (agent.memory)
dtype = [
('field1', 'O'), # Object type for array
('field2', 'i4'), # 4-byte integer
('field3', 'i4'), # 4-byte integer
('field4', 'O'), # Object type for array
('field5', 'b') # Boolean
]
plot_behavior(data,states_buy, states_sell, total_profit)
np.ndarray(shape=(0,5), dtype=dtype, order='F')
structured_array = np.array(agent.memory, dtype=dtype)
reshaped_memory = []
for mem in structured_array:
field1_flattened = mem['field1'].flatten()
field4_flattened = mem['field4'].flatten()
combined = np.concatenate([field1_flattened, [mem['field2']], [mem['field3']], field4_flattened, [mem['field5']]])
reshaped_memory.append(combined)
reshaped_memory = np.array(reshaped_memory)
pd.DataFrame(reshaped_memory).to_csv("Agent"+str(e)+".csv")
#Chart to show how the model performs with the stock goin up and down for each
#plot_behavior(data,states_buy, states_sell, total_profit)
if len(agent.memory) > batch_size:
agent.expReplay(batch_size)
#if e % 2 == 0:
agent.model.save("model_ep" + str(e)+".keras")