Skip to content

Commit

Permalink
Solving several bugs in Sinergym (#174)
Browse files Browse the repository at this point in the history
* Changed name .vscode to .vscode_conf in order to not to affect current local workspace vscode IDE (developer user)

* Solved bug #168

* Trying to solve issue #171

* Solved #173

* Fixed bug about duplicated progress.csv logs when DRL_battery.py script is used

* Added env.close() when DRL algorithm learning process doesn't close env automatically in DRL_battery.py (Added log message when simulation is closed in simulator Backend to detect this problem in the future easily).

* DRL_battery.py: The total number of timesteps is equal to the number of episodes multiplied by their size MINUS 1 in order to avoid a last reset and add an empty episode.

* Re-structure for evaluation callback (Separation between evaluation callback and evaluation policy in Sinergym code)

* Deleted reset in EvalCallback in order to avoid empty episodes

* Migrating evaluation policy from callbacks

* Separated env environment from train environment+

* Fixed tests in order to new changes

* Added MANIFEST.in sinergym/data files
  • Loading branch information
AlejandroCN7 authored Mar 10, 2022
1 parent 9f3fd87 commit 9e6ea93
Show file tree
Hide file tree
Showing 12 changed files with 226 additions and 206 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/merge_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
uses: actions/checkout@v2
- name: Build the latest Docker image
if: "${{ env.DOCKER_USER != '' && env.DOCKER_PASSWORD != '' }}"
run: docker build . --file Dockerfile --build-arg SINERGYM_EXTRAS=[extra] --tag $DOCKER_USER/sinergym:latest
run: docker build . --file Dockerfile --build-arg SINERGYM_EXTRAS=[extras] --tag $DOCKER_USER/sinergym:latest
- name: Login in Docker Hub account
if: "${{ env.DOCKER_USER != '' && env.DOCKER_PASSWORD != '' }}"
run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ __pycache__/
.ipynb_checkpoints
*.egg-info

#vscode conf
.vscode

# Other softwares
EnergyPlus*/
bcvtb/
Expand Down
4 changes: 2 additions & 2 deletions .vscode/settings.json → .vscode_conf/settings.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"python.linting.pycodestyleEnabled": true,
"python.linting.pycodestyleEnabled": false,
"python.formatting.provider": "autopep8",
"python.formatting.autopep8Args": [
"--aggressive",
"--aggressive"
],
"editor.formatOnSave": true,
"window.zoomLevel": 2,
"window.zoomLevel": 0,
"python.testing.pytestArgs": [
"tests"
],
Expand Down
89 changes: 58 additions & 31 deletions DRL_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper,
NormalizeObservation)

#--------------------------------BATTERY ARGUMENTS DEFINITION---------------------------------#
# ---------------------------------------------------------------------------- #
# Parameters definition #
# ---------------------------------------------------------------------------- #
parser = argparse.ArgumentParser()
# commons arguments for battery
parser.add_argument(
Expand Down Expand Up @@ -149,15 +151,20 @@
parser.add_argument('--sigma', '-sig', type=float, default=0.1)

args = parser.parse_args()
#---------------------------------------------------------------------------------------------#
# register run name
#------------------------------------------------------------------------------#

# ---------------------------------------------------------------------------- #
# Register run name #
# ---------------------------------------------------------------------------- #
experiment_date = datetime.today().strftime('%Y-%m-%d %H:%M')
name = args.algorithm + '-' + args.environment + \
'-episodes_' + str(args.episodes)
if args.seed:
name += '-seed_' + str(args.seed)
name += '(' + experiment_date + ')'
# Check if MLFLOW_TRACKING_URI is defined
# ---------------------------------------------------------------------------- #
# Check if MLFLOW_TRACKING_URI is defined #
# ---------------------------------------------------------------------------- #
mlflow_tracking_uri = os.environ.get('MLFLOW_TRACKING_URI')
if mlflow_tracking_uri is not None:
# Check ping to server
Expand Down Expand Up @@ -199,25 +206,25 @@
mlflow.log_param('tau', args.tau)
mlflow.log_param('sigma', args.sigma)

# Environment construction (with reward specified)
# ---------------------------------------------------------------------------- #
# Environment construction (with reward specified) #
# ---------------------------------------------------------------------------- #
if args.reward == 'linear':
env = gym.make(args.environment, reward=LinearReward())
reward = LinearReward()
elif args.reward == 'exponential':
env = gym.make(args.environment, reward=ExpReward())
reward = ExpReward()
else:
raise RuntimeError('Reward function specified is not registered.')

# env wrappers (optionals)
if args.normalization:
env = NormalizeObservation(env)
if args.logger:
env = LoggerWrapper(env)
if args.multiobs:
env = MultiObsWrapper(env)

######################## TRAINING ########################
env = gym.make(args.environment, reward=reward)
# env for evaluation if is enabled
eval_env = None
if args.evaluation:
eval_env = gym.make(args.environment, reward=reward)

# env wrappers (optionals)
# ---------------------------------------------------------------------------- #
# Wrappers #
# ---------------------------------------------------------------------------- #
if args.normalization:
# We have to know what dictionary ranges to use
norm_range = None
Expand All @@ -231,11 +238,19 @@
else:
raise NameError('env_type is not valid, check environment name')
env = NormalizeObservation(env, ranges=norm_range)
if eval_env is not None:
eval_env = NormalizeObservation(eval_env, ranges=norm_range)
if args.logger:
env = LoggerWrapper(env)
if eval_env is not None:
eval_env = LoggerWrapper(eval_env)
if args.multiobs:
env = MultiObsWrapper(env)
# Defining model(algorithm)
if eval_env is not None:
eval_env = MultiObsWrapper(eval_env)
# ---------------------------------------------------------------------------- #
# Defining model(algorithm) #
# ---------------------------------------------------------------------------- #
model = None
#--------------------------DQN---------------------------#
if args.algorithm == 'DQN':
Expand Down Expand Up @@ -345,21 +360,22 @@
raise RuntimeError('Algorithm specified is not registered.')
#--------------------------------------------------------#

# Calculating n_timesteps_episode for training
# ---------------------------------------------------------------------------- #
# Calculating total training timesteps based on number of episodes #
# ---------------------------------------------------------------------------- #
n_timesteps_episode = env.simulator._eplus_one_epi_len / \
env.simulator._eplus_run_stepsize
timesteps = args.episodes * n_timesteps_episode
timesteps = args.episodes * n_timesteps_episode - 1

# For callbacks processing
env_vec = DummyVecEnv([lambda: env])

# Using Callbacks for training
# ---------------------------------------------------------------------------- #
# CALLBACKS #
# ---------------------------------------------------------------------------- #
callbacks = []

# Set up Evaluation and saving best model
if args.evaluation:
eval_callback = LoggerEvalCallback(
env_vec,
eval_env,
best_model_save_path='best_model/' + name + '/',
log_path='best_model/' + name + '/',
eval_freq=n_timesteps_episode *
Expand All @@ -380,14 +396,23 @@

callback = CallbackList(callbacks)

# Training
# ---------------------------------------------------------------------------- #
# TRAINING #
# ---------------------------------------------------------------------------- #
model.learn(
total_timesteps=timesteps,
callback=callback,
log_interval=args.log_interval)
model.save(env.simulator._env_working_dir_parent + '/' + name)

# If mlflow artifacts store is active
# If Algorithm doesn't reset or close environment, this script will do in
# order to log correctly all simulation data (Energyplus + Sinergym logs)
if env.simulator._episode_existed:
env.close()

# ---------------------------------------------------------------------------- #
# Mlflow artifacts storege #
# ---------------------------------------------------------------------------- #
if args.mlflow_store:
# Code for send output and tensorboard to mlflow artifacts.
mlflow.log_artifacts(
Expand All @@ -403,8 +428,9 @@
local_dir=args.tensorboard + '/' + name + '/',
artifact_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name + '/')

# Store all results if remote_store flag is True (Google Cloud Bucket for
# experiments)
# ---------------------------------------------------------------------------- #
# Google Cloud Bucket Storage #
# ---------------------------------------------------------------------------- #
if args.remote_store:
# Initiate Google Cloud client
client = gcloud.init_storage_client()
Expand Down Expand Up @@ -436,8 +462,9 @@
# End mlflow run
mlflow.end_run()

# If it is a Google Cloud VM and experiment flag auto_delete has been
# activated, shutdown remote machine when ends
# ---------------------------------------------------------------------------- #
# Autodelete option if is a cloud resource #
# ---------------------------------------------------------------------------- #
if args.group_name and args.auto_delete:
token = gcloud.get_service_account_token()
gcloud.delete_instance_MIG_from_container(args.group_name, token)
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include requirements.txt
include sinergym/version.txt
include sinergym/version.txt
recursive-include sinergym/data *
2 changes: 1 addition & 1 deletion sinergym/data/buildings/5ZoneAutoDXVAV.idf
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@
1, !- Begin Month
1, !- Begin Day of Month
, !- Begin Year
3, !- End Month
12, !- End Month
31, !- End Day of Month
, !- End Year
Monday, !- Day of Week for Start Day
Expand Down
33 changes: 17 additions & 16 deletions sinergym/envs/eplus_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def __init__(

# Reward class
self.cls_reward = reward
self.obs_dict = None

def step(self,
action: Union[int,
Expand Down Expand Up @@ -166,19 +167,19 @@ def step(self,
self.simulator.logger_main.debug(action_)
time_info, obs, done = self.simulator.step(action_)
# Create dictionary with observation
obs_dict = dict(zip(self.variables['observation'], obs))
self.obs_dict = dict(zip(self.variables['observation'], obs))
# Add current timestep information
obs_dict['day'] = time_info[0]
obs_dict['month'] = time_info[1]
obs_dict['hour'] = time_info[2]
self.obs_dict['day'] = time_info[0]
self.obs_dict['month'] = time_info[1]
self.obs_dict['hour'] = time_info[2]

# Calculate reward

# Calculate temperature mean for all building zones
temp_values = [value for key, value in obs_dict.items(
temp_values = [value for key, value in self.obs_dict.items(
) if key.startswith('Zone Air Temperature')]

power = obs_dict['Facility Total HVAC Electricity Demand Rate (Whole Building)']
power = self.obs_dict['Facility Total HVAC Electricity Demand Rate (Whole Building)']
reward, terms = self.cls_reward.calculate(
power, temp_values, time_info[1], time_info[0])

Expand All @@ -187,17 +188,17 @@ def step(self,
'timestep': int(
time_info[3] / self.simulator._eplus_run_stepsize),
'time_elapsed': int(time_info[3]),
'day': obs_dict['day'],
'month': obs_dict['month'],
'hour': obs_dict['hour'],
'day': self.obs_dict['day'],
'month': self.obs_dict['month'],
'hour': self.obs_dict['hour'],
'total_power': power,
'total_power_no_units': terms['reward_energy'],
'comfort_penalty': terms['reward_comfort'],
'temperatures': temp_values,
'out_temperature': obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'],
'out_temperature': self.obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'],
'action_': action_}

return np.array(list(obs_dict.values()),
return np.array(list(self.obs_dict.values()),
dtype=np.float32), reward, done, info

def reset(self) -> np.ndarray:
Expand All @@ -208,13 +209,13 @@ def reset(self) -> np.ndarray:
"""
# Change to next episode
time_info, obs, done = self.simulator.reset(self.weather_variability)
obs_dict = dict(zip(self.variables['observation'], obs))
self.obs_dict = dict(zip(self.variables['observation'], obs))

obs_dict['day'] = time_info[0]
obs_dict['month'] = time_info[1]
obs_dict['hour'] = time_info[2]
self.obs_dict['day'] = time_info[0]
self.obs_dict['month'] = time_info[1]
self.obs_dict['hour'] = time_info[2]

return np.array(list(obs_dict.values()), dtype=np.float32)
return np.array(list(self.obs_dict.values()), dtype=np.float32)

def render(self, mode: str = 'human') -> None:
"""Environment rendering.
Expand Down
9 changes: 6 additions & 3 deletions sinergym/simulators/eplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sinergym.utils.config import Config

LOG_LEVEL_MAIN = 'INFO'
LOG_LEVEL_EPLS = 'ERROR'
LOG_LEVEL_EPLS = 'FATAL'
LOG_FMT = "[%(asctime)s] %(name)s %(levelname)s:%(message)s"


Expand Down Expand Up @@ -150,11 +150,12 @@ def reset(
# End the last episode if exists
if self._episode_existed:
self._end_episode()
self.logger_main.info('Last EnergyPlus process has been closed. ')
self.logger_main.info(
'EnergyPlus episode completed successfully. ')
self._epi_num += 1

# Create EnergyPlus simulation process
self.logger_main.info('Creating EnergyPlus simulation environment...')
self.logger_main.info('Creating new EnergyPlus simulation episode...')
# Creating episode working dir
eplus_working_dir = self._config.set_episode_working_dir()
# Getting IDF, WEATHER, VARIABLES and OUTPUT path for current episode
Expand Down Expand Up @@ -401,6 +402,8 @@ def end_env(self) -> None:
self._end_episode()
# self._socket.shutdown(socket.SHUT_RDWR);
self._socket.close()
self.logger_main.info(
'EnergyPlus simulation closed successfully. ')

def end_episode(self) -> None:
"""It ends current simulator episode."""
Expand Down
Loading

0 comments on commit 9e6ea93

Please sign in to comment.