From 8ceda3b3b20759ab0a0e802d6428413681c57aee Mon Sep 17 00:00:00 2001
From: Antonin RAFFIN <antonin.raffin@ensta.org>
Date: Wed, 31 Jul 2019 13:10:56 +0200
Subject: [PATCH] Release 2.7.0 (#426)

* Release 2.7.0

* Update changelog.rst

* Update tensorboard.rst
---
 docs/guide/algos.rst         |  5 +++++
 docs/guide/tensorboard.rst   | 36 ++++++++++++++++++++++++++++++++++++
 docs/misc/changelog.rst      | 18 +++++++++---------
 setup.py                     |  4 ++--
 stable_baselines/__init__.py |  2 +-
 5 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/docs/guide/algos.rst b/docs/guide/algos.rst
index d3c29b15c5..cea0669fa1 100644
--- a/docs/guide/algos.rst
+++ b/docs/guide/algos.rst
@@ -48,3 +48,8 @@ Actions ``gym.spaces``:
 - ``MultiBinary``: A list of possible actions, where each timestep any of the actions can be used in any combination.
 
 .. _MPI: https://mpi4py.readthedocs.io/en/stable/
+
+.. note::
+
+  Some logging values (like `ep_rewmean`, `eplenmean`) are only available when using a Monitor wrapper
+  See `Issue #339 <https://github.com/hill-a/stable-baselines/issues/339>`_ for more info.
diff --git a/docs/guide/tensorboard.rst b/docs/guide/tensorboard.rst
index 8a13fc6999..2795c590c3 100644
--- a/docs/guide/tensorboard.rst
+++ b/docs/guide/tensorboard.rst
@@ -76,6 +76,42 @@ It will display information such as the model graph, the episode reward, the mod
   :width: 400
   :alt: graph
 
+
+Logging More Values
+-------------------
+
+Using a callback, you can easily log more values with TensorBoard.
+Here is a simple example on how to log both additional tensor or arbitrary scalar value:
+
+.. code-block:: python
+
+  import tensorflow as tf
+  import numpy as np
+
+  from stable_baselines import SAC
+
+  model = SAC("MlpPolicy", "Pendulum-v0", tensorboard_log="/tmp/sac/", verbose=1)
+  # Define a new property to avoid global variable
+  model.is_tb_set = False
+
+
+  def callback(locals_, globals_):
+      self_ = locals_['self']
+      # Log additional tensor
+      if not self_.is_tb_set:
+          with self_.graph.as_default():
+              tf.summary.scalar('value_target', tf.reduce_mean(self_.value_target))
+              self_.summary = tf.summary.merge_all()
+          self_.is_tb_set = True
+      # Log scalar value (here a random variable)
+      value = np.random.random()
+      summary = tf.Summary(value=[tf.Summary.Value(tag='random_value', simple_value=value)])
+      locals_['writer'].add_summary(summary, self_.num_timesteps)
+      return True
+
+
+  model.learn(50000, callback=callback)
+
 Legacy Integration
 -------------------
 
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index 1453f6418f..da5fa68242 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -6,10 +6,10 @@ Changelog
 For download links, please look at `Github release page <https://github.com/hill-a/stable-baselines/releases>`_.
 
 
-Pre-Release 2.7.0a0 (WIP)
+Release 2.7.0 (2019-07-31)
 --------------------------
 
-**Twin Delayed DDPG (TD3)**
+**Twin Delayed DDPG (TD3) and GAE bug fix (TRPO, PPO1, GAIL)**
 
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
@@ -17,18 +17,17 @@ Breaking Changes:
 New Features:
 ^^^^^^^^^^^^^
 - added Twin Delayed DDPG (TD3) algorithm, with HER support
-
-- Add support for continuous action spaces to `action_probability`, computing the PDF of a Gaussian
+- added support for continuous action spaces to `action_probability`, computing the PDF of a Gaussian
   policy in addition to the existing support for categorical stochastic policies.
-- Add flag to `action_probability` to return log-probabilities.
-- Added support for python lists and numpy arrays in ``logger.writekvs``. (@dwiel)
-- The info dicts returned by VecEnvs now include a ``terminal_observation`` key providing access to the last observation in a trajectory. (@qxcv)
+- added flag to `action_probability` to return log-probabilities.
+- added support for python lists and numpy arrays in ``logger.writekvs``. (@dwiel)
+- the info dict returned by VecEnvs now include a ``terminal_observation`` key providing access to the last observation in a trajectory. (@qxcv)
 
 Bug Fixes:
 ^^^^^^^^^^
 - fixed a bug in ``traj_segment_generator`` where the ``episode_starts`` was wrongly recorded,
   resulting in wrong calculation of Generalized Advantage Estimation (GAE), this affects TRPO, PPO1 and GAIL (thanks to @miguelrass for spotting the bug)
-- add missing property `n_batch` in `BasePolicy`.
+- added missing property `n_batch` in `BasePolicy`.
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -38,12 +37,13 @@ Others:
 - renamed some keys in ``traj_segment_generator`` to be more meaningful
 - retrieve unnormalized reward when using Monitor wrapper with TRPO, PPO1 and GAIL
   to display them in the logs (mean episode reward)
-- Clean up DDPG code (renamed variables)
+- clean up DDPG code (renamed variables)
 
 Documentation:
 ^^^^^^^^^^^^^^
 
 - doc fix for the hyperparameter tuning command in the rl zoo
+- added an example on how to log additional variable with tensorboard and a callback
 
 
 
diff --git a/setup.py b/setup.py
index 336c4f2f8f..56387e8f3d 100644
--- a/setup.py
+++ b/setup.py
@@ -118,7 +118,7 @@
       ] + tf_dependency,
       extras_require={
         'tests': [
-            'pytest==3.5.1',
+            'pytest',
             'pytest-cov',
             'pytest-env',
             'pytest-xdist',
@@ -138,7 +138,7 @@
       license="MIT",
       long_description=long_description,
       long_description_content_type='text/markdown',
-      version="2.7.0a0",
+      version="2.7.0",
       )
 
 # python setup.py sdist
diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py
index 93d1b0e193..35f62999ae 100644
--- a/stable_baselines/__init__.py
+++ b/stable_baselines/__init__.py
@@ -11,4 +11,4 @@
 from stable_baselines.trpo_mpi import TRPO
 from stable_baselines.sac import SAC
 
-__version__ = "2.6.1a0"
+__version__ = "2.7.0"