diff --git a/.github/workflows/merge_pr.yml b/.github/workflows/merge_pr.yml index 48eda14252..f1508a7263 100644 --- a/.github/workflows/merge_pr.yml +++ b/.github/workflows/merge_pr.yml @@ -100,7 +100,7 @@ jobs: uses: actions/checkout@v2 - name: Build the latest Docker image if: "${{ env.DOCKER_USER != '' && env.DOCKER_PASSWORD != '' }}" - run: docker build . --file Dockerfile --build-arg SINERGYM_EXTRAS=[extra] --tag $DOCKER_USER/sinergym:latest + run: docker build . --file Dockerfile --build-arg SINERGYM_EXTRAS=[extras] --tag $DOCKER_USER/sinergym:latest - name: Login in Docker Hub account if: "${{ env.DOCKER_USER != '' && env.DOCKER_PASSWORD != '' }}" run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD diff --git a/.gitignore b/.gitignore index 4b0c32d441..4d11b52145 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ __pycache__/ .ipynb_checkpoints *.egg-info +#vscode conf +.vscode + # Other softwares EnergyPlus*/ bcvtb/ diff --git a/.vscode/settings.json b/.vscode_conf/settings.json similarity index 81% rename from .vscode/settings.json rename to .vscode_conf/settings.json index be7372fcf0..cb95f6ee55 100644 --- a/.vscode/settings.json +++ b/.vscode_conf/settings.json @@ -1,12 +1,12 @@ { - "python.linting.pycodestyleEnabled": true, + "python.linting.pycodestyleEnabled": false, "python.formatting.provider": "autopep8", "python.formatting.autopep8Args": [ "--aggressive", "--aggressive" ], "editor.formatOnSave": true, - "window.zoomLevel": 2, + "window.zoomLevel": 0, "python.testing.pytestArgs": [ "tests" ], diff --git a/DRL_battery.py b/DRL_battery.py index 6ad2be372c..e6397ee52d 100644 --- a/DRL_battery.py +++ b/DRL_battery.py @@ -19,7 +19,9 @@ from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper, NormalizeObservation) -#--------------------------------BATTERY ARGUMENTS DEFINITION---------------------------------# +# ---------------------------------------------------------------------------- # +# Parameters definition # +# ---------------------------------------------------------------------------- # parser = argparse.ArgumentParser() # commons arguments for battery parser.add_argument( @@ -149,15 +151,20 @@ parser.add_argument('--sigma', '-sig', type=float, default=0.1) args = parser.parse_args() -#---------------------------------------------------------------------------------------------# -# register run name +#------------------------------------------------------------------------------# + +# ---------------------------------------------------------------------------- # +# Register run name # +# ---------------------------------------------------------------------------- # experiment_date = datetime.today().strftime('%Y-%m-%d %H:%M') name = args.algorithm + '-' + args.environment + \ '-episodes_' + str(args.episodes) if args.seed: name += '-seed_' + str(args.seed) name += '(' + experiment_date + ')' -# Check if MLFLOW_TRACKING_URI is defined +# ---------------------------------------------------------------------------- # +# Check if MLFLOW_TRACKING_URI is defined # +# ---------------------------------------------------------------------------- # mlflow_tracking_uri = os.environ.get('MLFLOW_TRACKING_URI') if mlflow_tracking_uri is not None: # Check ping to server @@ -199,25 +206,25 @@ mlflow.log_param('tau', args.tau) mlflow.log_param('sigma', args.sigma) - # Environment construction (with reward specified) + # ---------------------------------------------------------------------------- # + # Environment construction (with reward specified) # + # ---------------------------------------------------------------------------- # if args.reward == 'linear': - env = gym.make(args.environment, reward=LinearReward()) + reward = LinearReward() elif args.reward == 'exponential': - env = gym.make(args.environment, reward=ExpReward()) + reward = ExpReward() else: raise RuntimeError('Reward function specified is not registered.') - # env wrappers (optionals) - if args.normalization: - env = NormalizeObservation(env) - if args.logger: - env = LoggerWrapper(env) - if args.multiobs: - env = MultiObsWrapper(env) - - ######################## TRAINING ######################## + env = gym.make(args.environment, reward=reward) + # env for evaluation if is enabled + eval_env = None + if args.evaluation: + eval_env = gym.make(args.environment, reward=reward) - # env wrappers (optionals) + # ---------------------------------------------------------------------------- # + # Wrappers # + # ---------------------------------------------------------------------------- # if args.normalization: # We have to know what dictionary ranges to use norm_range = None @@ -231,11 +238,19 @@ else: raise NameError('env_type is not valid, check environment name') env = NormalizeObservation(env, ranges=norm_range) + if eval_env is not None: + eval_env = NormalizeObservation(eval_env, ranges=norm_range) if args.logger: env = LoggerWrapper(env) + if eval_env is not None: + eval_env = LoggerWrapper(eval_env) if args.multiobs: env = MultiObsWrapper(env) - # Defining model(algorithm) + if eval_env is not None: + eval_env = MultiObsWrapper(eval_env) + # ---------------------------------------------------------------------------- # + # Defining model(algorithm) # + # ---------------------------------------------------------------------------- # model = None #--------------------------DQN---------------------------# if args.algorithm == 'DQN': @@ -345,21 +360,22 @@ raise RuntimeError('Algorithm specified is not registered.') #--------------------------------------------------------# - # Calculating n_timesteps_episode for training + # ---------------------------------------------------------------------------- # + # Calculating total training timesteps based on number of episodes # + # ---------------------------------------------------------------------------- # n_timesteps_episode = env.simulator._eplus_one_epi_len / \ env.simulator._eplus_run_stepsize - timesteps = args.episodes * n_timesteps_episode + timesteps = args.episodes * n_timesteps_episode - 1 - # For callbacks processing - env_vec = DummyVecEnv([lambda: env]) - - # Using Callbacks for training + # ---------------------------------------------------------------------------- # + # CALLBACKS # + # ---------------------------------------------------------------------------- # callbacks = [] # Set up Evaluation and saving best model if args.evaluation: eval_callback = LoggerEvalCallback( - env_vec, + eval_env, best_model_save_path='best_model/' + name + '/', log_path='best_model/' + name + '/', eval_freq=n_timesteps_episode * @@ -380,14 +396,23 @@ callback = CallbackList(callbacks) - # Training + # ---------------------------------------------------------------------------- # + # TRAINING # + # ---------------------------------------------------------------------------- # model.learn( total_timesteps=timesteps, callback=callback, log_interval=args.log_interval) model.save(env.simulator._env_working_dir_parent + '/' + name) - # If mlflow artifacts store is active + # If Algorithm doesn't reset or close environment, this script will do in + # order to log correctly all simulation data (Energyplus + Sinergym logs) + if env.simulator._episode_existed: + env.close() + + # ---------------------------------------------------------------------------- # + # Mlflow artifacts storege # + # ---------------------------------------------------------------------------- # if args.mlflow_store: # Code for send output and tensorboard to mlflow artifacts. mlflow.log_artifacts( @@ -403,8 +428,9 @@ local_dir=args.tensorboard + '/' + name + '/', artifact_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name + '/') - # Store all results if remote_store flag is True (Google Cloud Bucket for - # experiments) + # ---------------------------------------------------------------------------- # + # Google Cloud Bucket Storage # + # ---------------------------------------------------------------------------- # if args.remote_store: # Initiate Google Cloud client client = gcloud.init_storage_client() @@ -436,8 +462,9 @@ # End mlflow run mlflow.end_run() - # If it is a Google Cloud VM and experiment flag auto_delete has been - # activated, shutdown remote machine when ends + # ---------------------------------------------------------------------------- # + # Autodelete option if is a cloud resource # + # ---------------------------------------------------------------------------- # if args.group_name and args.auto_delete: token = gcloud.get_service_account_token() gcloud.delete_instance_MIG_from_container(args.group_name, token) diff --git a/MANIFEST.in b/MANIFEST.in index b56478f723..2c47f151e0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include requirements.txt -include sinergym/version.txt \ No newline at end of file +include sinergym/version.txt +recursive-include sinergym/data * \ No newline at end of file diff --git a/sinergym/data/buildings/5ZoneAutoDXVAV.idf b/sinergym/data/buildings/5ZoneAutoDXVAV.idf index fdaacf8b7f..6a9e00badb 100644 --- a/sinergym/data/buildings/5ZoneAutoDXVAV.idf +++ b/sinergym/data/buildings/5ZoneAutoDXVAV.idf @@ -160,7 +160,7 @@ 1, !- Begin Month 1, !- Begin Day of Month , !- Begin Year - 3, !- End Month + 12, !- End Month 31, !- End Day of Month , !- End Year Monday, !- Day of Week for Start Day diff --git a/sinergym/envs/eplus_env.py b/sinergym/envs/eplus_env.py index e78dce5ff7..636cc037ff 100644 --- a/sinergym/envs/eplus_env.py +++ b/sinergym/envs/eplus_env.py @@ -119,6 +119,7 @@ def __init__( # Reward class self.cls_reward = reward + self.obs_dict = None def step(self, action: Union[int, @@ -166,19 +167,19 @@ def step(self, self.simulator.logger_main.debug(action_) time_info, obs, done = self.simulator.step(action_) # Create dictionary with observation - obs_dict = dict(zip(self.variables['observation'], obs)) + self.obs_dict = dict(zip(self.variables['observation'], obs)) # Add current timestep information - obs_dict['day'] = time_info[0] - obs_dict['month'] = time_info[1] - obs_dict['hour'] = time_info[2] + self.obs_dict['day'] = time_info[0] + self.obs_dict['month'] = time_info[1] + self.obs_dict['hour'] = time_info[2] # Calculate reward # Calculate temperature mean for all building zones - temp_values = [value for key, value in obs_dict.items( + temp_values = [value for key, value in self.obs_dict.items( ) if key.startswith('Zone Air Temperature')] - power = obs_dict['Facility Total HVAC Electricity Demand Rate (Whole Building)'] + power = self.obs_dict['Facility Total HVAC Electricity Demand Rate (Whole Building)'] reward, terms = self.cls_reward.calculate( power, temp_values, time_info[1], time_info[0]) @@ -187,17 +188,17 @@ def step(self, 'timestep': int( time_info[3] / self.simulator._eplus_run_stepsize), 'time_elapsed': int(time_info[3]), - 'day': obs_dict['day'], - 'month': obs_dict['month'], - 'hour': obs_dict['hour'], + 'day': self.obs_dict['day'], + 'month': self.obs_dict['month'], + 'hour': self.obs_dict['hour'], 'total_power': power, 'total_power_no_units': terms['reward_energy'], 'comfort_penalty': terms['reward_comfort'], 'temperatures': temp_values, - 'out_temperature': obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'], + 'out_temperature': self.obs_dict['Site Outdoor Air Drybulb Temperature (Environment)'], 'action_': action_} - return np.array(list(obs_dict.values()), + return np.array(list(self.obs_dict.values()), dtype=np.float32), reward, done, info def reset(self) -> np.ndarray: @@ -208,13 +209,13 @@ def reset(self) -> np.ndarray: """ # Change to next episode time_info, obs, done = self.simulator.reset(self.weather_variability) - obs_dict = dict(zip(self.variables['observation'], obs)) + self.obs_dict = dict(zip(self.variables['observation'], obs)) - obs_dict['day'] = time_info[0] - obs_dict['month'] = time_info[1] - obs_dict['hour'] = time_info[2] + self.obs_dict['day'] = time_info[0] + self.obs_dict['month'] = time_info[1] + self.obs_dict['hour'] = time_info[2] - return np.array(list(obs_dict.values()), dtype=np.float32) + return np.array(list(self.obs_dict.values()), dtype=np.float32) def render(self, mode: str = 'human') -> None: """Environment rendering. diff --git a/sinergym/simulators/eplus.py b/sinergym/simulators/eplus.py index 5613752660..61f09090c9 100644 --- a/sinergym/simulators/eplus.py +++ b/sinergym/simulators/eplus.py @@ -24,7 +24,7 @@ from sinergym.utils.config import Config LOG_LEVEL_MAIN = 'INFO' -LOG_LEVEL_EPLS = 'ERROR' +LOG_LEVEL_EPLS = 'FATAL' LOG_FMT = "[%(asctime)s] %(name)s %(levelname)s:%(message)s" @@ -150,11 +150,12 @@ def reset( # End the last episode if exists if self._episode_existed: self._end_episode() - self.logger_main.info('Last EnergyPlus process has been closed. ') + self.logger_main.info( + 'EnergyPlus episode completed successfully. ') self._epi_num += 1 # Create EnergyPlus simulation process - self.logger_main.info('Creating EnergyPlus simulation environment...') + self.logger_main.info('Creating new EnergyPlus simulation episode...') # Creating episode working dir eplus_working_dir = self._config.set_episode_working_dir() # Getting IDF, WEATHER, VARIABLES and OUTPUT path for current episode @@ -401,6 +402,8 @@ def end_env(self) -> None: self._end_episode() # self._socket.shutdown(socket.SHUT_RDWR); self._socket.close() + self.logger_main.info( + 'EnergyPlus simulation closed successfully. ') def end_episode(self) -> None: """It ends current simulator episode.""" diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py index e77775ead3..37b67cde02 100644 --- a/sinergym/utils/callbacks.py +++ b/sinergym/utils/callbacks.py @@ -1,17 +1,16 @@ """Custom Callbacks for stable baselines 3 algorithms.""" import os -import warnings -from typing import Any, Callable, Dict, Optional, Union +from typing import Optional, Union import gym import numpy as np -from stable_baselines3.common import base_class from stable_baselines3.common.callbacks import BaseCallback, EvalCallback from stable_baselines3.common.env_util import is_wrapped from stable_baselines3.common.vec_env import VecEnv, sync_envs_normalization from sinergym.utils.wrappers import LoggerWrapper, NormalizeObservation +from sinergym.utils.evaluation import evaluate_policy class LoggerCallback(BaseCallback): @@ -26,7 +25,6 @@ class LoggerCallback(BaseCallback): def __init__(self, sinergym_logger=False, verbose=0): """Custom callback for plotting additional values in tensorboard. - Args: sinergym_logger (boolean): Indicate if CSVLogger inner Sinergym will be activated or not. """ @@ -184,7 +182,6 @@ class LoggerEvalCallback(EvalCallback): :param render: Whether to render or not the environment during evaluation :param verbose: :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been wrapped with a Monitor wrapper) - """ def __init__( @@ -227,28 +224,30 @@ def _on_step(self) -> bool: # Reset success rate buffer self._is_success_buffer = [] - - episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties = evaluate_policy( + #episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties + episodes_data = evaluate_policy( self.model, self.eval_env, n_eval_episodes=self.n_eval_episodes, render=self.render, deterministic=self.deterministic, - return_episode_rewards=True, - warn=self.warn, callback=None, ) if self.log_path is not None: self.evaluations_timesteps.append(self.num_timesteps) - self.evaluations_results.append(episodes_rewards) - self.evaluations_length.append(episodes_lengths) - self.evaluations_power_consumption.append(episodes_powers) + self.evaluations_results.append( + episodes_data['episodes_rewards']) + self.evaluations_length.append( + episodes_data['episodes_lengths']) + self.evaluations_power_consumption.append( + episodes_data['episodes_powers']) self.evaluations_comfort_violation.append( - episodes_comfort_violations) + episodes_data['episodes_comfort_violations']) self.evaluations_comfort_penalty.append( - episodes_comfort_penalties) - self.evaluations_power_penalty.append(episodes_power_penalties) + episodes_data['episodes_comfort_penalties']) + self.evaluations_power_penalty.append( + episodes_data['episodes_power_penalties']) kwargs = {} # Save success log if present @@ -269,21 +268,23 @@ def _on_step(self) -> bool: ) mean_reward, std_reward = np.mean( - episodes_rewards), np.std(episodes_rewards) + episodes_data['episodes_rewards']), np.std( + episodes_data['episodes_rewards']) mean_ep_length, std_ep_length = np.mean( - episodes_lengths), np.std(episodes_lengths) - - self.evaluation_metrics['cumulative_reward'] = np.mean( - mean_reward) - self.evaluation_metrics['ep_length'] = mean_ep_length - self.evaluation_metrics['power_consumption'] = np.mean( - episodes_powers) + episodes_data['episodes_lengths']), np.std( + episodes_data['episodes_lengths']) + + self.evaluation_metrics['mean_rewards'] = mean_reward + self.evaluation_metrics['std_rewards'] = std_reward + self.evaluation_metrics['mean_ep_length'] = mean_ep_length + self.evaluation_metrics['mean_power_consumption'] = np.mean( + episodes_data['episodes_powers']) self.evaluation_metrics['comfort_violation(%)'] = np.mean( - episodes_comfort_violations) + episodes_data['episodes_comfort_violations']) self.evaluation_metrics['comfort_penalty'] = np.mean( - episodes_comfort_penalties) + episodes_data['episodes_comfort_penalties']) self.evaluation_metrics['power_penalty'] = np.mean( - episodes_power_penalties) + episodes_data['episodes_power_penalties']) if self.verbose > 0: print( @@ -313,125 +314,3 @@ def _on_step(self) -> bool: return self._on_event() return True - - -def evaluate_policy(model: "base_class.BaseAlgorithm", - env: Union[gym.Env, - VecEnv], - n_eval_episodes: int = 5, - deterministic: bool = True, - render: bool = False, - callback: Optional[Callable[[Dict[str, - Any], - Dict[str, - Any]], - None]] = None, - reward_threshold: Optional[float] = None, - return_episode_rewards: bool = False, - warn: bool = True) -> Any: - """Runs policy for ``n_eval_episodes`` episodes and returns average reward. This is made to work only with one env. - .. note:: If environment has not been wrapped with ``Monitor`` wrapper, reward and - episode lengths are counted as it appears with ``env.step`` calls. If - the environment contains wrappers that modify rewards or episode lengths - (e.g. reward scaling, early episode reset), these will affect the evaluation - results as well. You can avoid this by wrapping environment with ``Monitor`` - wrapper before anything else. - :param model: The RL agent you want to evaluate. - :param env: The gym environment. In the case of a ``VecEnv`` this must contain only one environment. - :param n_eval_episodes: Number of episode to evaluate the agent - :param deterministic: Whether to use deterministic or stochastic actions - :param render: Whether to render the environment or not - :param callback: callback function to do additional checks, called after each step. Gets locals() and globals() passed as parameters. - :param reward_threshold: Minimum expected reward per episode, this will raise an error if the performance is not met - :param return_episode_rewards: If True, a list of rewards and episode lengths per episode will be returned instead of the mean. - :param warn: If True (default), warns user about lack of a Monitor wrapper in the evaluation environment. - :return: Mean reward per episode, std of reward per episode. - Returns ([float], [int]) when ``return_episode_rewards`` is True, first - list containing per-episode rewards and second containing per-episode lengths - (in number of steps). - - """ - - is_monitor_wrapped = False - # Avoid circular import - from stable_baselines3.common.env_util import is_wrapped - from stable_baselines3.common.monitor import Monitor - - if isinstance(env, VecEnv): - assert env.num_envs == 1, "You must pass only one environment when using this function" - is_monitor_wrapped = env.env_is_wrapped(Monitor)[0] - else: - is_monitor_wrapped = is_wrapped(env, Monitor) - - if not is_monitor_wrapped and warn: - warnings.warn( - "Evaluation environment is not wrapped with a ``Monitor`` wrapper. " - "This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. " - "Consider wrapping environment first with ``Monitor`` wrapper.", UserWarning, ) - - episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties = [], [], [], [], [], [] - not_reseted = True - while len(episodes_rewards) < n_eval_episodes: - # Number of loops here might differ from true episodes - # played, if underlying wrappers modify episode lengths. - # Avoid double reset, as VecEnv are reset automatically. - if not isinstance(env, VecEnv) or not_reseted: - obs = env.reset() - not_reseted = False - done, state = False, None - episode_reward = 0.0 - episode_length = 0 - episode_steps_comfort_violation = 0 - episode_power = 0.0 - episode_comfort_penalty = 0.0 - episode_power_penalty = 0.0 - while not done: - action, state = model.predict( - obs, state=state, deterministic=deterministic) - obs, reward, done, info = env.step(action) - episode_reward += reward - episode_power += info[0]['total_power'] - episode_power_penalty += info[0]['total_power_no_units'] - episode_comfort_penalty += info[0]['comfort_penalty'] - if info[0]['comfort_penalty'] != 0: - episode_steps_comfort_violation += 1 - if callback is not None: - callback(locals(), globals()) - episode_length += 1 - if render: - env.render() - - if is_monitor_wrapped: - # Do not trust "done" with episode endings. - # Remove vecenv stacking (if any) - if isinstance(env, VecEnv): - info = info[0] - if "episode" in info.keys(): - # Monitor wrapper includes "episode" key in info if environment - # has been wrapped with it. Use those rewards instead. - episodes_rewards.append(info["episode"]["r"]) - episodes_lengths.append(info["episode"]["l"]) - else: - episodes_rewards.append(episode_reward) - episodes_lengths.append(episode_length) - episodes_powers.append(episode_power) - try: - episodes_comfort_violations.append( - episode_steps_comfort_violation / episode_length * 100) - except ZeroDivisionError: - episodes_comfort_violations.append(np.nan) - episodes_comfort_penalties.append(episode_comfort_penalty) - episodes_power_penalties.append(episode_power_penalty) - - mean_reward = np.mean(episodes_rewards) - std_reward = np.std(episodes_rewards) - # mean_power = np.mean(episodes_powers) - # std_power = np.std(episodes_powers) - # mean_comfort_violation= np.mean(episodes_comfort_violations) - # std_comfort_violation= np.std(episodes_comfort_violations) - if reward_threshold is not None: - assert mean_reward > reward_threshold, "Mean reward below threshold: " f"{mean_reward:.2f} < {reward_threshold:.2f}" - if return_episode_rewards: - return episodes_rewards, episodes_lengths, episodes_powers, episodes_comfort_violations, episodes_comfort_penalties, episodes_power_penalties - else: - return mean_reward, std_reward diff --git a/sinergym/utils/evaluation.py b/sinergym/utils/evaluation.py new file mode 100644 index 0000000000..28599245b9 --- /dev/null +++ b/sinergym/utils/evaluation.py @@ -0,0 +1,106 @@ +"""Custom policy evaluations for Evaluation Callbacks.""" + +from itertools import accumulate +import warnings +from typing import Any, Callable, Dict, Optional, Union + +import gym +import numpy as np +from stable_baselines3.common import base_class +from stable_baselines3.common.vec_env import VecEnv + + +def evaluate_policy(model: "base_class.BaseAlgorithm", + env: Union[gym.Env, + VecEnv], + n_eval_episodes: int = 5, + deterministic: bool = True, + render: bool = False, + callback: Optional[Callable[[Dict[str, + Any], + Dict[str, + Any]], + None]] = None, + ) -> Any: + """Runs policy for ``n_eval_episodes`` episodes and returns average reward. This is made to work only with one env. + .. note:: If environment has not been wrapped with ``Monitor`` wrapper, reward and + episode lengths are counted as it appears with ``env.step`` calls. If + the environment contains wrappers that modify rewards or episode lengths + (e.g. reward scaling, early episode reset), these will affect the evaluation + results as well. You can avoid this by wrapping environment with ``Monitor`` + wrapper before anything else. + :param model: The RL agent you want to evaluate. + :param env: The gym environment. In the case of a ``VecEnv`` this must contain only one environment. + :param n_eval_episodes: Number of episode to evaluate the agent + :param deterministic: Whether to use deterministic or stochastic actions + :param render: Whether to render the environment or not + :param callback: callback function to do additional checks, called after each step. Gets locals() and globals() passed as parameters. + :param reward_threshold: Minimum expected reward per episode, this will raise an error if the performance is not met + :param return_episode_rewards: If True, a list of rewards and episode lengths per episode will be returned instead of the mean. + :return: Mean reward per episode, std of reward per episode. + Returns ([float], [int]) when ``return_episode_rewards`` is True, first + list containing per-episode rewards and second containing per-episode lengths + (in number of steps). + + """ + result = { + 'episodes_rewards': [], + 'episodes_lengths': [], + 'episodes_powers': [], + 'episodes_comfort_violations': [], + 'episodes_comfort_penalties': [], + 'episodes_power_penalties': [] + } + episodes_executed = 0 + not_reseted = True + while episodes_executed < n_eval_episodes: + # Number of loops here might differ from true episodes + # played, if underlying wrappers modify episode lengths. + # Avoid double reset, as VecEnv are reset automatically. + if not isinstance(env, VecEnv) or not_reseted: + # obs = list(map( + # lambda obs_dict: np.array(list(obs_dict.values()), dtype=np.float32), + # env.get_attr('obs_dict'))) + obs = env.reset() + not_reseted = False + done, state = False, None + episode_reward = 0.0 + episode_length = 0 + episode_steps_comfort_violation = 0 + episode_power = 0.0 + episode_comfort_penalty = 0.0 + episode_power_penalty = 0.0 + # ---------------------------------------------------------------------------- # + # Running episode and accumulate values # + # ---------------------------------------------------------------------------- # + while not done: + action, state = model.predict( + obs, state=state, deterministic=deterministic) + obs, reward, done, info = env.step(action) + episode_reward += reward + episode_power += info[0]['total_power'] + episode_power_penalty += info[0]['total_power_no_units'] + episode_comfort_penalty += info[0]['comfort_penalty'] + if info[0]['comfort_penalty'] != 0: + episode_steps_comfort_violation += 1 + if callback is not None: + callback(locals(), globals()) + episode_length += 1 + if render: + env.render() + episodes_executed += 1 + # ---------------------------------------------------------------------------- # + # Storing accumulated values in result # + # ---------------------------------------------------------------------------- # + result['episodes_rewards'].append(episode_reward) + result['episodes_lengths'].append(episode_length) + result['episodes_powers'].append(episode_power) + try: + result['episodes_comfort_violations'].append( + episode_steps_comfort_violation / episode_length * 100) + except ZeroDivisionError: + result['episodes_comfort_violations'].append(np.nan) + result['episodes_comfort_penalties'].append(episode_comfort_penalty) + result['episodes_power_penalties'].append(episode_power_penalty) + + return result diff --git a/tests/test_config.py b/tests/test_config.py index d154b776d6..abac3082aa 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -127,12 +127,12 @@ def test_apply_weather_variability(config): def test_get_eplus_run_info(config): info = config._get_eplus_run_info() - assert info == (1, 1, 0, 3, 31, 0, 0, 4) + assert info == (1, 1, 0, 12, 31, 0, 0, 4) def test_get_one_epi_len(config): total_time = config._get_one_epi_len() - assert total_time == 7776000 + assert total_time == 31536000 def test_set_experiment_working_dir(config): diff --git a/tests/test_simulator.py b/tests/test_simulator.py index c6c2b46056..c2bb41d33f 100644 --- a/tests/test_simulator.py +++ b/tests/test_simulator.py @@ -27,7 +27,7 @@ def test_reset(simulator): # Checking simulator state assert simulator._eplus_run_stepsize == 900 - assert simulator._eplus_one_epi_len == 7776000 + assert simulator._eplus_one_epi_len == 31536000 assert simulator._curSimTim == 0 assert simulator._env_working_dir_parent.split( '/')[-1] == 'Eplus-env-' + simulator._env_name + '-res1'