Solving several bugs in Sinergym (#174)

* Changed name .vscode to .vscode_conf in order to not to affect current local workspace vscode IDE (developer user) * Solved bug #168 * Trying to solve issue #171 * Solved #173 * Fixed bug about duplicated progress.csv logs when DRL_battery.py script is used * Added env.close() when DRL algorithm learning process doesn't close env automatically in DRL_battery.py (Added log message when simulation is closed in simulator Backend to detect this problem in the future easily). * DRL_battery.py: The total number of timesteps is equal to the number of episodes multiplied by their size MINUS 1 in order to avoid a last reset and add an empty episode. * Re-structure for evaluation callback (Separation between evaluation callback and evaluation policy in Sinergym code) * Deleted reset in EvalCallback in order to avoid empty episodes * Migrating evaluation policy from callbacks * Separated env environment from train environment+ * Fixed tests in order to new changes * Added MANIFEST.in sinergym/data files
ugr-sail · Mar 10, 2022 · 9e6ea93 · 9e6ea93
1 parent 9f3fd87
commit 9e6ea93
Show file tree

Hide file tree

Showing 12 changed files with 226 additions and 206 deletions.
diff --git a/.github/workflows/merge_pr.yml b/.github/workflows/merge_pr.yml
@@ -100,7 +100,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Build the latest Docker image
         if: "${{ env.DOCKER_USER != '' && env.DOCKER_PASSWORD != '' }}"
-        run: docker build . --file Dockerfile --build-arg SINERGYM_EXTRAS=[extra] --tag $DOCKER_USER/sinergym:latest
+        run: docker build . --file Dockerfile --build-arg SINERGYM_EXTRAS=[extras] --tag $DOCKER_USER/sinergym:latest
       - name: Login in Docker Hub account
         if: "${{ env.DOCKER_USER != '' && env.DOCKER_PASSWORD != '' }}"
         run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD

diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,9 @@ __pycache__/
 .ipynb_checkpoints
 *.egg-info
 
+#vscode conf
+.vscode
+
 # Other softwares
 EnergyPlus*/
 bcvtb/

diff --git a/.vscode/settings.json → .vscode_conf/settings.json b/.vscode/settings.json → .vscode_conf/settings.json
@@ -1,12 +1,12 @@
 {
-    "python.linting.pycodestyleEnabled": true,
+    "python.linting.pycodestyleEnabled": false,
     "python.formatting.provider": "autopep8",
     "python.formatting.autopep8Args": [
         "--aggressive",
         "--aggressive"
     ],
     "editor.formatOnSave": true,
-    "window.zoomLevel": 2,
+    "window.zoomLevel": 0,
     "python.testing.pytestArgs": [
         "tests"
     ],

diff --git a/DRL_battery.py b/DRL_battery.py
@@ -19,7 +19,9 @@
 from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper,
                                      NormalizeObservation)
 
-#--------------------------------BATTERY ARGUMENTS DEFINITION---------------------------------#
+# ---------------------------------------------------------------------------- #
+#                             Parameters definition                            #
+# ---------------------------------------------------------------------------- #
 parser = argparse.ArgumentParser()
 # commons arguments for battery
 parser.add_argument(
@@ -149,15 +151,20 @@
 parser.add_argument('--sigma', '-sig', type=float, default=0.1)
 
 args = parser.parse_args()
-#---------------------------------------------------------------------------------------------#
-# register run name
+#------------------------------------------------------------------------------#
+
+# ---------------------------------------------------------------------------- #
+#                               Register run name                              #
+# ---------------------------------------------------------------------------- #
 experiment_date = datetime.today().strftime('%Y-%m-%d %H:%M')
 name = args.algorithm + '-' + args.environment + \
     '-episodes_' + str(args.episodes)
 if args.seed:
     name += '-seed_' + str(args.seed)
 name += '(' + experiment_date + ')'
-# Check if MLFLOW_TRACKING_URI is defined
+# ---------------------------------------------------------------------------- #
+#                    Check if MLFLOW_TRACKING_URI is defined                   #
+# ---------------------------------------------------------------------------- #
 mlflow_tracking_uri = os.environ.get('MLFLOW_TRACKING_URI')
 if mlflow_tracking_uri is not None:
     # Check ping to server
@@ -199,25 +206,25 @@
     mlflow.log_param('tau', args.tau)
     mlflow.log_param('sigma', args.sigma)
 
-    # Environment construction (with reward specified)
+    # ---------------------------------------------------------------------------- #
+    #               Environment construction (with reward specified)               #
+    # ---------------------------------------------------------------------------- #
     if args.reward == 'linear':
-        env = gym.make(args.environment, reward=LinearReward())
+        reward = LinearReward()
     elif args.reward == 'exponential':
-        env = gym.make(args.environment, reward=ExpReward())
+        reward = ExpReward()
     else:
         raise RuntimeError('Reward function specified is not registered.')
 
-    # env wrappers (optionals)
-    if args.normalization:
-        env = NormalizeObservation(env)
-    if args.logger:
-        env = LoggerWrapper(env)
-    if args.multiobs:
-        env = MultiObsWrapper(env)
-
-    ######################## TRAINING ########################
+    env = gym.make(args.environment, reward=reward)
+    # env for evaluation if is enabled
+    eval_env = None
+    if args.evaluation:
+        eval_env = gym.make(args.environment, reward=reward)
 
-    # env wrappers (optionals)
+    # ---------------------------------------------------------------------------- #
+    #                                   Wrappers                                   #
+    # ---------------------------------------------------------------------------- #
     if args.normalization:
         # We have to know what dictionary ranges to use
         norm_range = None
@@ -231,11 +238,19 @@
         else:
             raise NameError('env_type is not valid, check environment name')
         env = NormalizeObservation(env, ranges=norm_range)
+        if eval_env is not None:
+            eval_env = NormalizeObservation(eval_env, ranges=norm_range)
     if args.logger:
         env = LoggerWrapper(env)
+        if eval_env is not None:
+            eval_env = LoggerWrapper(eval_env)
     if args.multiobs:
         env = MultiObsWrapper(env)
-    # Defining model(algorithm)
+        if eval_env is not None:
+            eval_env = MultiObsWrapper(eval_env)
+    # ---------------------------------------------------------------------------- #
+    #                           Defining model(algorithm)                          #
+    # ---------------------------------------------------------------------------- #
     model = None
     #--------------------------DQN---------------------------#
     if args.algorithm == 'DQN':
@@ -345,21 +360,22 @@
         raise RuntimeError('Algorithm specified is not registered.')
     #--------------------------------------------------------#
 
-    # Calculating n_timesteps_episode for training
+    # ---------------------------------------------------------------------------- #
+    #       Calculating total training timesteps based on number of episodes       #
+    # ---------------------------------------------------------------------------- #
     n_timesteps_episode = env.simulator._eplus_one_epi_len / \
         env.simulator._eplus_run_stepsize
-    timesteps = args.episodes * n_timesteps_episode
+    timesteps = args.episodes * n_timesteps_episode - 1
 
-    # For callbacks processing
-    env_vec = DummyVecEnv([lambda: env])
-
-    # Using Callbacks for training
+    # ---------------------------------------------------------------------------- #
+    #                                   CALLBACKS                                  #
+    # ---------------------------------------------------------------------------- #
     callbacks = []
 
     # Set up Evaluation and saving best model
     if args.evaluation:
         eval_callback = LoggerEvalCallback(
-            env_vec,
+            eval_env,
             best_model_save_path='best_model/' + name + '/',
             log_path='best_model/' + name + '/',
             eval_freq=n_timesteps_episode *
@@ -380,14 +396,23 @@
 
     callback = CallbackList(callbacks)
 
-    # Training
+    # ---------------------------------------------------------------------------- #
+    #                                   TRAINING                                   #
+    # ---------------------------------------------------------------------------- #
     model.learn(
         total_timesteps=timesteps,
         callback=callback,
         log_interval=args.log_interval)
     model.save(env.simulator._env_working_dir_parent + '/' + name)
 
-    # If mlflow artifacts store is active
+    # If Algorithm doesn't reset or close environment, this script will do in
+    # order to log correctly all simulation data (Energyplus + Sinergym logs)
+    if env.simulator._episode_existed:
+        env.close()
+
+    # ---------------------------------------------------------------------------- #
+    #                           Mlflow artifacts storege                           #
+    # ---------------------------------------------------------------------------- #
     if args.mlflow_store:
         # Code for send output and tensorboard to mlflow artifacts.
         mlflow.log_artifacts(
@@ -403,8 +428,9 @@
                 local_dir=args.tensorboard + '/' + name + '/',
                 artifact_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name + '/')
 
-    # Store all results if remote_store flag is True (Google Cloud Bucket for
-    # experiments)
+    # ---------------------------------------------------------------------------- #
+    #                          Google Cloud Bucket Storage                         #
+    # ---------------------------------------------------------------------------- #
     if args.remote_store:
         # Initiate Google Cloud client
         client = gcloud.init_storage_client()
@@ -436,8 +462,9 @@
     # End mlflow run
     mlflow.end_run()
 
-    # If it is a Google Cloud VM and experiment flag auto_delete has been
-    # activated, shutdown remote machine when ends
+    # ---------------------------------------------------------------------------- #
+    #                   Autodelete option if is a cloud resource                   #
+    # ---------------------------------------------------------------------------- #
     if args.group_name and args.auto_delete:
         token = gcloud.get_service_account_token()
         gcloud.delete_instance_MIG_from_container(args.group_name, token)
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1,3 @@
 include requirements.txt
-include sinergym/version.txt
+include sinergym/version.txt
+recursive-include sinergym/data *
diff --git a/sinergym/data/buildings/5ZoneAutoDXVAV.idf b/sinergym/data/buildings/5ZoneAutoDXVAV.idf
@@ -160,7 +160,7 @@
     1,                       !- Begin Month
     1,                       !- Begin Day of Month
     ,                        !- Begin Year
-    3,                       !- End Month
+    12,                       !- End Month
     31,                      !- End Day of Month
     ,                        !- End Year
     Monday,                  !- Day of Week for Start Day

diff --git a/sinergym/envs/eplus_env.py b/sinergym/envs/eplus_env.py
@@ -119,6 +119,7 @@ def __init__(
 
         # Reward class
         self.cls_reward = reward
+        self.obs_dict = None
 
     def step(self,
              action: Union[int,
@@ -166,19 +167,19 @@ def step(self,
         self.simulator.logger_main.debug(action_)
         time_info, obs, done = self.simulator.step(action_)
         # Create dictionary with observation
-        obs_dict = dict(zip(self.variables['observation'], obs))
+        self.obs_dict = dict(zip(self.variables['observation'], obs))
         # Add current timestep information
-        obs_dict['day'] = time_info[0]
-        obs_dict['month'] = time_info[1]
-        obs_dict['hour'] = time_info[2]
+        self.obs_dict['day'] = time_info[0]
+        self.obs_dict['month'] = time_info[1]
+        self.obs_dict['hour'] = time_info[2]
 
         # Calculate reward
 
         # Calculate temperature mean for all building zones
-        temp_values = [value for key, value in obs_dict.items(
+        temp_values = [value for key, value in self.obs_dict.items(
         ) if key.startswith('Zone Air Temperature')]
 
-        power = obs_dict['Facility Total HVAC Electricity Demand Rate (Whole Building)']
+        power = self.obs_dict['Facility Total HVAC Electricity Demand Rate (Whole Building)']
         reward, terms = self.cls_reward.calculate(
             power, temp_values, time_info[1], time_info[0])
 
@@ -187,17 +188,17 @@ def step(self,
             'timestep': int(
                 time_info[3] / self.simulator._eplus_run_stepsize),
             'time_elapsed': int(time_info[3]),
-            'day': obs_dict['day'],
-            'month': obs_dict['month'],
-            'hour': obs_dict['hour'],
+            'day': self.obs_dict['day'],
+            'month': self.obs_dict['month'],
+            'hour': self.obs_dict['hour'],
             'total_power': power,
             'total_power_no_units': terms['reward_energy'],
             'comfort_penalty': terms['reward_comfort'],
             'temperatures': temp_values,
-            'out_temperature': obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'],
+            'out_temperature': self.obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'],
             'action_': action_}
 
-        return np.array(list(obs_dict.values()),
+        return np.array(list(self.obs_dict.values()),
                         dtype=np.float32), reward, done, info
 
     def reset(self) -> np.ndarray:
@@ -208,13 +209,13 @@ def reset(self) -> np.ndarray:
         """
         # Change to next episode
         time_info, obs, done = self.simulator.reset(self.weather_variability)
-        obs_dict = dict(zip(self.variables['observation'], obs))
+        self.obs_dict = dict(zip(self.variables['observation'], obs))
 
-        obs_dict['day'] = time_info[0]
-        obs_dict['month'] = time_info[1]
-        obs_dict['hour'] = time_info[2]
+        self.obs_dict['day'] = time_info[0]
+        self.obs_dict['month'] = time_info[1]
+        self.obs_dict['hour'] = time_info[2]
 
-        return np.array(list(obs_dict.values()), dtype=np.float32)
+        return np.array(list(self.obs_dict.values()), dtype=np.float32)
 
     def render(self, mode: str = 'human') -> None:
         """Environment rendering.

diff --git a/sinergym/simulators/eplus.py b/sinergym/simulators/eplus.py
@@ -24,7 +24,7 @@
 from sinergym.utils.config import Config
 
 LOG_LEVEL_MAIN = 'INFO'
-LOG_LEVEL_EPLS = 'ERROR'
+LOG_LEVEL_EPLS = 'FATAL'
 LOG_FMT = "[%(asctime)s] %(name)s %(levelname)s:%(message)s"
 
 
@@ -150,11 +150,12 @@ def reset(
         # End the last episode if exists
         if self._episode_existed:
             self._end_episode()
-            self.logger_main.info('Last EnergyPlus process has been closed. ')
+            self.logger_main.info(
+                'EnergyPlus episode completed successfully. ')
             self._epi_num += 1
 
         # Create EnergyPlus simulation process
-        self.logger_main.info('Creating EnergyPlus simulation environment...')
+        self.logger_main.info('Creating new EnergyPlus simulation episode...')
         # Creating episode working dir
         eplus_working_dir = self._config.set_episode_working_dir()
         # Getting IDF, WEATHER, VARIABLES and OUTPUT path for current episode
@@ -401,6 +402,8 @@ def end_env(self) -> None:
         self._end_episode()
         # self._socket.shutdown(socket.SHUT_RDWR);
         self._socket.close()
+        self.logger_main.info(
+            'EnergyPlus simulation closed successfully. ')
 
     def end_episode(self) -> None:
         """It ends current simulator episode."""