forked from federicovergallo/SUMO-changing-lane-agent
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main_step.py
68 lines (53 loc) · 2.41 KB
/
main_step.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from configparser import ConfigParser
from argparse import ArgumentParser
import torch
import gym
import numpy as np
import os
import gym_sumo
from agents.ppo import PPO
from agents.sac import SAC
from agents.ddpg import DDPG
from utils.utils import make_transition, Dict, RunningMeanStd
os.makedirs('./model_weights', exist_ok=True)
parser = ArgumentParser('parameters')
parser.add_argument("--env_name", type=str, default ='gym_sumo-v1')
parser.add_argument("--algo", type=str, default = 'ppo', help = 'algorithm to adjust (default : ppo)')
parser.add_argument('--train', type=bool, default=True, help="(default: True)")
parser.add_argument('--render', type=bool, default=False, help="(default: False)")
parser.add_argument('--epochs', type=int, default=1, help='number of epochs, (default: 1000)')
parser.add_argument('--tensorboard', type=bool, default=False, help='use_tensorboard, (default: False)')
parser.add_argument("--load", type=str, default = 'no', help = 'load network name in ./model_weights')
parser.add_argument("--save_interval", type=int, default = 100, help = 'save interval(default: 100)')
parser.add_argument("--print_interval", type=int, default = 1, help = 'print interval(default : 20)')
parser.add_argument("--use_cuda", type=bool, default = True, help = 'cuda usage(default : True)')
parser.add_argument("--reward_scaling", type=float, default = 0.1, help = 'reward scaling(default : 0.1)')
args = parser.parse_args()
parser = ConfigParser()
parser.read('config.ini')
agent_args = Dict(parser,args.algo)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if args.use_cuda == False:
device = 'cpu'
if args.tensorboard:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
else:
writer = None
env = gym.make(args.env_name)
action_dim = 2
state_dim = 37
state_rms = RunningMeanStd(state_dim)
score_lst = []
state_lst = []
avg_scors=[]
if agent_args.on_policy == True:
score = 0.0
# state = np.clip((state_ - state_rms.mean) / (state_rms.var ** 0.5 + 1e-8), -5, 5)
for n_epi in range(args.epochs):
state = env.reset(gui=args.render, numVehicles=25)
for t in range(agent_args.traj_length):
next_state_, reward_info, done, info = env.step([0,1],sumo_lc=True,sumo_carfollow=True,stop_and_go=True,car_follow='Gipps',lane_change='SECRM')
if done:
print('rl vehicle run out of network!!')
break