-
Notifications
You must be signed in to change notification settings - Fork 6
/
config_cartpole.yaml
59 lines (54 loc) · 1.42 KB
/
config_cartpole.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
game_name: 'CartPole-v0'
device: 'cpu' # make sure to use `cuda` on GPU servers
is_learning: True
is_testing: True
test: False
folder_location: './baseline/'
domain: 'gym'
folder_name: 'cartpole_env'
network_path: 'weights_new.pt'
batch: False
dataset_path: 'dataset/10000/123/1_0/counts_dataset.pkl'
baseline_path: 'weights.pt'
seed: 123
dataset_size: 1000
# learning_type can be regular, pi_b (ie SPIBB-DQN), soft_sort (ie Soft-SPIBB-DQN) or ramdp
learning_type: 'pi_b'
# minimum_count corresponds to n_wedge in the paper. Set to 0 for vanilla DQN.
minimum_count: 10.0
# epsilon_count is the epsilon value used for soft spibb
epsilon_soft: 1.0
# kappa corresponds to the kappa parameter in ramdp. Set to 0 for vanilla DQN.
kappa: 0.003
baseline_temp: 0.2
count_param: 0.2
num_experiments: 1
num_epochs: 30
passes_on_dataset: 2000
steps_per_test: 20000
episode_max_len: 200
max_start_nullops: 0
steps_per_epoch: 3000
extra_stochasticity: 0.
epsilon: 1
annealing: True
final_epsilon: 0.01
test_epsilon: 0
annealing_start: 0
annealing_steps: 100000
ddqn: True
network_size: 'dense' # `large`=nips paper model, `nature`=nature paper model
# network_size: 'small_dense' # `large`=nips paper model, `nature`=nature paper model
gamma: .9
learning_rate: 0.00025
minibatch_size: 32
update_freq: 1000
state_shape: [4]
nb_actions: 2
history_len: 1
replay_max_size: 1000000
replay_min_size: 1
learning_frequency: 1
action_dim: 1
reward_dim: 1
normalize: 1.