Merge pull request #38 from GaspardQin/master

OmniRobot environment
araffin · May 21, 2019 · 1ab1bd3 · 1ab1bd3
2 parents aad209d + c0da4e6
commit 1ab1bd3
Show file tree

Hide file tree

Showing 43 changed files with 2,963 additions and 75 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,7 +11,7 @@ logs_real/
 .pytest_cache/
 docker/environment.yml
 _build/
-
+.vscode/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/README.md b/README.md
@@ -110,9 +110,9 @@ All the environments we propose follow the OpenAI Gym interface. We also extende
 
 ### Available Environments
 
-| **Kuka environment**       | **Mobile Robot environment**       | **Racing car environment**       |
-| -------------------------- | ---------------------------------- | -------------------------------- |
-| <img src="imgs/kuka.gif"/> | <img src="imgs/mobile_robot.gif"/> | <img src="imgs/racing_car.gif"/> |
+| **Kuka environment**       | **Mobile Robot environment**       | **Racing car environment**       | **Omnidirectional robot environment**       |
+| -------------------------- | ---------------------------------- | -------------------------------- | ------------------------------------------- |
+| <img src="imgs/kuka.gif"/> | <img src="imgs/mobile_robot.gif"/> | <img src="imgs/racing_car.gif"/> | <img src="imgs/omnirobot.gif"/>             |
 
 
 | **Name**                          | **Action space (discrete)**                | **Action space (continuous)**                 | **Rewards**                                                                                                                                             | **ground truth**                                  |
@@ -126,6 +126,7 @@ All the environments we propose follow the OpenAI Gym interface. We also extende
 | **MobileRobot**<br>**1D**         | 2 actions (1D cardinal direction)          | 1 axis (1D cardinal direction)                | 1 when target reached, -1 for a wall hit, otherwise 0 <sup>(2)</sup>                                                                                    | the X position of the robot <sup>(4)</sup>        |
 | **MobileRobot**<br>**LineTarget** | 4 actions (2D cardinal direction)          | 2 axis (2D cardinal direction)                | 1 when target reached, -1 for a wall hit, otherwise 0 <sup>(2)</sup>                                                                                    | the X,Y position of the robot <sup>(4)</sup>      |
 | **CarRacing**                     | 4 actions (left, right, accelerate, brake) | 3 axis (stearing, accelerate, brake)          | -100 when out of bounds, otherwise -0.1                                                                                                                 | the X,Y position of the car <sup>(4)</sup>        |
+| **OmniRobot**                     | 4 actions (2D cardinal direction)          | 2 axis (2D cardinal direction)                | 1 when target reached, -1 for a wall hit, otherwise 0 <sup>(2)</sup>                                                                                    | the X,Y position of the robot <sup>(4)</sup>      |
 
 <sub><sup>1. The action space can use 6 axis arm joints control with the `--joints` flag</sup></sub><br>
 <sup><sup>2. The reward can be the euclidian distance to the target with the `--shape-reward` flag</sup></sup><br>
@@ -150,6 +151,8 @@ the available environments are:
     - Baxter-v0: A bridge to use a baxter robot with ROS (in simulation, it uses Gazebo)
 - Robobo: A Robobo robot that must reach a target position.
     - RoboboGymEnv-v0: A bridge to use a Robobo robot with ROS.
+- OmniRobot: An Omnidirectional robot on a 2d terrain that must reach a target position (see [Working With Real Robots: OmniRobot](https://github.com/GaspardQin/robotics-rl-srl/tree/master/real_robots))
+    - OmnirobotEnv-v0: Simulator but also a bridge to use an OmniRobot with ROS.
 
 Please read the [documentation](https://s-rl-toolbox.readthedocs.io/) for more details (e.g. adding a custom environment).
 

diff --git a/config/srl_models.yaml b/config/srl_models.yaml
@@ -135,3 +135,8 @@ CarRacingGymEnv-v0:
   autoencoder_inverse: 18-07-20_12h13_18_custom_cnn_ST_DIM200_autoencoder_inverse/srl_model.pth
   autoencoder_reward: 18-07-20_14h35_43_custom_cnn_ST_DIM200_autoencoder_reward/srl_model.pth
   srl_combination: 18-07-19_18h16_03_custom_cnn_ST_DIM200_reward_autoencoder_inverse/srl_model.pth
+
+OmnirobotEnv-v0:
+  # Base path to SRL log folder
+  log_folder: srl_zoo/logs/omnirobot_simulator/
+  autoencoder: 19-02-04_23h27_22_custom_cnn_ST_DIM200_autoencoder_reward_inverse_forward/srl_model.pth
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -5,6 +5,14 @@ Changelog
 
 For download links, please look at `Github release page <https://github.com/araffin/robotics-rl-srl/releases>`_.
 
+Release 1.3.0 (2019-??-??)
+--------------------------
+
+- added OmniRobot environment for simulation and real life setting
+- updated package version (gym, stable-baselines)
+- updated doc and tests
+- added script for merging datasets
+
 Release 1.2.0 (2019-01-17)
 --------------------------
 

diff --git a/docs/guide/envs.rst b/docs/guide/envs.rst
@@ -57,6 +57,10 @@ You can find a recap table in the README.
 
    -  RoboboGymEnv-v0: A bridge to use a Robobo robot with ROS.
 
+-  Omnidirectional Robot: A mobile robot on a 2d terrain that must reach a target position.
+
+   -  OmnirobotEnv-v0: A bridge to use a real Omnirobot with ROS (also available in simulation).
+
 
 Generating Data
 ---------------

diff --git a/docs/guide/real_robots.rst b/docs/guide/real_robots.rst
@@ -2,8 +2,8 @@
 
 .. _working-with-real-robots:-baxter-and-robobo:
 
-Working With Real Robots: Baxter and Robobo
-===========================================
+Working With Real Robots: Baxter, Robobo and Omnirobot
+======================================================
 
 Baxter Robot with Gazebo and ROS
 --------------------------------
@@ -257,10 +257,158 @@ RL on a Real Robobo
 
 ::
 
-   python -m visdom.server
+    python -m visdom.server
 
 4. Train the agent (python 3)
 
 ::
 
-   python -m rl_baselines.train --srl-model ground_truth --log-dir logs_real/ --num-stack 1 --algo ppo2 --env RoboboGymEnv-v0
+    python -m rl_baselines.train --srl-model ground_truth --log-dir logs_real/ --num-stack 1 --algo ppo2 --env RoboboGymEnv-v0
+
+
+Working With a Real Omnirobot
+-----------------------------
+
+By default, Omnirobot uses the same reward and terminal policy with the MobileRobot environment.
+Thus each episodes will have exactly 251 steps, and when the robot touches the target,
+it will get ``reward=1``, when it touches the border, it will get ``reward=-1``, otherwise, ``reward=0``.
+
+All the important parameters are writed in constants.py,
+thus you can simply modified the reward or terminal policy of this environment.
+
+Architecture of Omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
+Architecture of Real Omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The omnirobot's environment contains two principle components (two threads).
+
+- ``real_robots/omnirobot_server.py`` (python2, using ROS to communicate with robot)
+- ``environments/omnirobot_gym/omnirobot_env.py`` (python3, wrapped baseline environment)
+
+These two components uses zmq socket to communicate. The socket port can be changed, and by defualt it's 7777.
+These two components should be launched manually, because they use different environment (ROS and anaconda).
+
+Architecture of Omnirobot Simulator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The simulator has only one thread, omnirobot_env. The simulator is a object of this running thread,
+it uses exactly the same api as ``zmq``,
+thus ``omnirobot_server`` can be easily switched to ``omnirobot_simulator_server`` without
+changing code of ``omnirobot_env``.
+
+Switch between real robot and simulator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- Switch from real robot to simulator
+  modify ``real_robots/constants.py``, set ``USING_OMNIROBOT = False`` and ``USING_OMNIROBOT_SIMULATOR = True``
+- Switch from simulator to real robot:
+  modify ``real_robots/constants.py``, set ``USING_OMNIROBOT = True`` and ``USING_OMNIROBOT_SIMULATOR = False``
+
+Real Omnirobot
+~~~~~~~~~~~~~~
+Omnirobot offers the clean environment for RL, for each step of RL,
+the real robot does a close-loop positional control to reach the supposed position.
+
+When the robot is moving, ``omnirobot_server`` will be blocked until it receives a msg from the topic ``finished``,
+which is sent by the robot.
+This blocking has a time out (by default 30s), thus if anything unexpected happens,
+the ``omnirobot_server`` will fail and close.
+
+Launch RL on real omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To launch the rl  experience of omnirobot, do these step-by-step:
+
+- switch to real robot (modify constans.py, ensure  ``USING_OMNIROBOT = True``)
+- setup ROS environment and comment ``anaconda`` in ``~/.bashrc``, launch a new terminal, run
+
+::
+
+    python -m real_robots.omnirobot_server
+
+- comment ROS environment and uncomment ``anaconda`` in ``~/.bashrc``, launch a new terminal.
+
+- If you want to train RL on real robot, run (with other options customizable):
+
+::
+
+    python -m rl_baselines.train --env OmnirobotEnv-v0
+
+- If you want to replay the RL policy on real robot, which can be trained on the simulator, run:
+
+::
+
+    python -m replay.enjoy_baselines --log-dir path/to/RL/logs -render
+
+Recording Data of real omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To launch a acquisition of real robot's dataset, do these step-by-step:
+
+- switch to real robot (modify constans.py, ensure  ``USING_OMNIROBOT = True``)
+- setup ROS environment and comment `anaconda` in `~/.bashrc`, launch a new terminal, run:
+
+::
+
+    python -m real_robots.omnirobot_server
+
+- Change ``episodes`` to the number of you want in ``environments/omnirobot_gym/test_env.py``
+- comment ROS environment and uncomment ``anaconda`` in ``~/.bashrc``, launch a new terminal, run:
+
+::
+
+    python -m environments.omnirobot_gym.test_env
+
+Note that you should move the target manually between the different episodes.
+Attention, you can try to use Random Agent or a agent always do the toward target policy
+(this can increase the positive reward proportion in the dataset),
+or combine them by setting a proportion (``TORWARD_TARGET_PROPORTION``).
+
+
+Omnirobot Simulator
+~~~~~~~~~~~~~~~~~~~
+This simulator uses photoshop tricks to make realistic image of environment. It need several image as input:
+
+- back ground image (480x480, undistorted)
+- robot's tag/code, cropped from a real environment image(480x480, undistorted), with a margin 3 or 4 pixels.
+- target's tag/code, cropped from a real environment image (480x480, undistorted), with a margin 3 or 4 pixels.
+
+It also needs some important information:
+
+- margin of markerts
+- camera info file's path, which generated by ROS' camera_calibration package.
+
+The camera matrix should be corresponding with original image size (eg. 640x480 for our case)
+
+The detail of the inputs above can be find from OmniRobotEnvRender's comments.
+
+Noise of Omnirobot Simulator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To make the simulator more general, and make RL/SRL more stable, several types of noise are added to it.
+The parameters of these noises can be modified from the top of ``omnirobot_simulator_server.py``
+
+- noise of robot position, yaw.
+  Gaussian noise, controlled by ``NOISE_VAR_ROBOT_POS`` and ``NOISE_VAR_ROBOT_YAW``.
+- noise of markers in pixel-wise.
+  Gaussian noise to simulate camera's noise, apply pixel-wise noise on the markers' images,
+  controlled by ``NOISE_VAR_TARGET_PIXEL`` and ``NOISE_VAR_ROBOT_PIXEL``.
+- noise of environment's luminosity.
+  Apply Gaussian noise on LAB space of output image, to simulate the environment's luminosity change,
+  controlled by ``NOISE_VAR_ENVIRONMENT``.
+- noise of marker's size.
+  Change size of robot's and target's marker proportionally, to simulate the position variance on the vertical axis.
+  This kind of  noise is controlled by ``NOISE_VAR_ROBOT_SIZE_PROPOTION`` and ``NOISE_VAR_TARGET_SIZE_PROPOTION``.
+
+Known issues of Omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~
+- The script ``omnirobot_server.py`` in robotics-rl-srl cannot be simply quitted by ctrl-c.
+    - This is because the zmq in python2 uses blocking behavior, even SIGINT cannot be detected when it is blocking.
+    - To quit the program, you should send ``SIGKILL`` to it. This can be done by ``kill -9`` or ``htop``.
+- Error: ``ImportError: /opt/ros/kinetic/lib/python2.7/dist-packages/cv2.so: undefined symbol: PyCObject_Type``
+    - You probably run a program expected to run in ``conda`` environment, sometimes even ``~/.bashrc`` is changed, and correctly applies ``source ~/.bashrc``, the environment still stays with ``ros``.
+    - In this situation, simply re-check the contents in ``~/.bashrc``, and open another new terminal to launch the programme.
+- Stuck at ``wait for client to connect`` or ``waiting to connect server``, there are several possible reasons.
+    - Port for client and server are not same. Try to use the same one
+    - Port is occupied by another client/server, you should kill it. If you cannot find the process which occupies this port, use ``fuser 7777\tcp -k`` to kill it directly. (7777 can be changed to any number of port).
+
+
diff --git a/docs/index.rst b/docs/index.rst
@@ -40,10 +40,8 @@ Main Features
 
 Related papers:
 
-- "Decoupling feature extraction from policy learning: assessing benefits of state representation learning in goal based robotics" (Raffin et al. 2018) https://openreview.net/forum?id=Hkl-di09FQ
--  "S-RL Toolbox: Environments, Datasets and Evaluation Metrics for
-   State Representation Learning" (Raffin et al., 2018)
-   `https://arxiv.org/abs/1809.09369 <https://arxiv.org/abs/1809.09369>`__
+- "Decoupling feature extraction from policy learning: assessing benefits of state representation learning in goal based robotics" (Raffin et al. 2018) `https://arxiv.org/abs/1901.08651 <https://arxiv.org/abs/1901.08651>`__
+-  "S-RL Toolbox: Environments, Datasets and Evaluation Metrics for State Representation Learning" (Raffin et al., 2018) `https://arxiv.org/abs/1809.09369 <https://arxiv.org/abs/1809.09369>`__
 
 .. note::
   This documentation only gives an overview of the RL Toolbox,

diff --git a/environment.yml b/environment.yml
@@ -98,6 +98,7 @@ dependencies:
     - enum34==1.1.6
     - future==0.16.0
     - futures==3.1.1
+    - gym==0.11.0
     - git+https://github.com/hyperopt/hyperopt.git
     - idna==2.6
     - joblib==0.11
@@ -119,8 +120,8 @@ dependencies:
     - requests==2.18.4
     - seaborn==0.8.1
     - scikit-learn==0.19.1
-    - scipy==1.0.0
-    - stable-baselines==2.4.0
+    - scipy==1.2.0
+    - stable-baselines==2.5.0
     - termcolor==1.1.0
     - torchfile==0.1.0
     - tornado==4.5.3

diff --git a/environments/change_to_relative_pos.py b/environments/change_to_relative_pos.py
@@ -0,0 +1,42 @@
+import argparse
+from os.path import join
+import shutil
+
+import numpy as np
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Change existed dataset whose ground_truth is global position to relative position')
+    parser.add_argument('--data-src', type=str, default=None, help='source data folder (global position)')
+    parser.add_argument('--data-dst', type=str, default=None, help='destination data folder, (relative position)')
+
+    args = parser.parse_args()
+    assert args.data_src is not None
+    assert args.data_dst is not None
+    ground_truth = np.load(join(args.data_src, 'ground_truth.npz'))
+    preprocessed_data = np.load(join(args.data_src, 'preprocessed_data.npz'))
+
+    shutil.copytree(args.data_src, args.data_dst)
+    episode_starts = preprocessed_data['episode_starts']
+    print(ground_truth.keys())
+    ground_truth_states = ground_truth['ground_truth_states']
+    target_position = ground_truth['target_positions']
+
+    episode_num = -1
+
+    print(ground_truth_states.shape)
+    for i in range(ground_truth_states.shape[0]):
+        if episode_starts[i] is True:
+            episode_num += 1
+        ground_truth_states[i, :] = ground_truth_states[i, :] - target_position[episode_num]
+    new_ground_truth = {}
+    for key in ground_truth.keys():
+        if key != 'ground_truth_states':
+            new_ground_truth[key] = ground_truth[key]
+    new_ground_truth['ground_truth_states'] = ground_truth_states
+    np.savez(join(args.data_dst, 'ground_truth.npz'), **new_ground_truth)
+
+
+if __name__ == '__main__':
+    main()