diff --git a/.gitignore b/.gitignore
index 33332a7a9..bff2842ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,7 +11,7 @@ logs_real/
 .pytest_cache/
 docker/environment.yml
 _build/
-
+.vscode/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/README.md b/README.md
index e845b110d..48fcc90be 100644
--- a/README.md
+++ b/README.md
@@ -110,9 +110,9 @@ All the environments we propose follow the OpenAI Gym interface. We also extende
 
 ### Available Environments
 
-| **Kuka environment**       | **Mobile Robot environment**       | **Racing car environment**       |
-| -------------------------- | ---------------------------------- | -------------------------------- |
-| <img src="imgs/kuka.gif"/> | <img src="imgs/mobile_robot.gif"/> | <img src="imgs/racing_car.gif"/> |
+| **Kuka environment**       | **Mobile Robot environment**       | **Racing car environment**       | **Omnidirectional robot environment**       |
+| -------------------------- | ---------------------------------- | -------------------------------- | ------------------------------------------- |
+| <img src="imgs/kuka.gif"/> | <img src="imgs/mobile_robot.gif"/> | <img src="imgs/racing_car.gif"/> | <img src="imgs/omnirobot.gif"/>             |
 
 
 | **Name**                          | **Action space (discrete)**                | **Action space (continuous)**                 | **Rewards**                                                                                                                                             | **ground truth**                                  |
@@ -126,6 +126,7 @@ All the environments we propose follow the OpenAI Gym interface. We also extende
 | **MobileRobot**<br>**1D**         | 2 actions (1D cardinal direction)          | 1 axis (1D cardinal direction)                | 1 when target reached, -1 for a wall hit, otherwise 0 <sup>(2)</sup>                                                                                    | the X position of the robot <sup>(4)</sup>        |
 | **MobileRobot**<br>**LineTarget** | 4 actions (2D cardinal direction)          | 2 axis (2D cardinal direction)                | 1 when target reached, -1 for a wall hit, otherwise 0 <sup>(2)</sup>                                                                                    | the X,Y position of the robot <sup>(4)</sup>      |
 | **CarRacing**                     | 4 actions (left, right, accelerate, brake) | 3 axis (stearing, accelerate, brake)          | -100 when out of bounds, otherwise -0.1                                                                                                                 | the X,Y position of the car <sup>(4)</sup>        |
+| **OmniRobot**                     | 4 actions (2D cardinal direction)          | 2 axis (2D cardinal direction)                | 1 when target reached, -1 for a wall hit, otherwise 0 <sup>(2)</sup>                                                                                    | the X,Y position of the robot <sup>(4)</sup>      |
 
 <sub><sup>1. The action space can use 6 axis arm joints control with the `--joints` flag</sup></sub><br>
 <sup><sup>2. The reward can be the euclidian distance to the target with the `--shape-reward` flag</sup></sup><br>
@@ -150,6 +151,8 @@ the available environments are:
     - Baxter-v0: A bridge to use a baxter robot with ROS (in simulation, it uses Gazebo)
 - Robobo: A Robobo robot that must reach a target position.
     - RoboboGymEnv-v0: A bridge to use a Robobo robot with ROS.
+- OmniRobot: An Omnidirectional robot on a 2d terrain that must reach a target position (see [Working With Real Robots: OmniRobot](https://github.com/GaspardQin/robotics-rl-srl/tree/master/real_robots))
+    - OmnirobotEnv-v0: Simulator but also a bridge to use an OmniRobot with ROS.
 
 Please read the [documentation](https://s-rl-toolbox.readthedocs.io/) for more details (e.g. adding a custom environment).
 
diff --git a/config/srl_models.yaml b/config/srl_models.yaml
index bf141dc0f..ea011f4a7 100644
--- a/config/srl_models.yaml
+++ b/config/srl_models.yaml
@@ -135,3 +135,8 @@ CarRacingGymEnv-v0:
   autoencoder_inverse: 18-07-20_12h13_18_custom_cnn_ST_DIM200_autoencoder_inverse/srl_model.pth
   autoencoder_reward: 18-07-20_14h35_43_custom_cnn_ST_DIM200_autoencoder_reward/srl_model.pth
   srl_combination: 18-07-19_18h16_03_custom_cnn_ST_DIM200_reward_autoencoder_inverse/srl_model.pth
+
+OmnirobotEnv-v0:
+  # Base path to SRL log folder
+  log_folder: srl_zoo/logs/omnirobot_simulator/
+  autoencoder: 19-02-04_23h27_22_custom_cnn_ST_DIM200_autoencoder_reward_inverse_forward/srl_model.pth
\ No newline at end of file
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 66ca89a1a..2a5f1f899 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,14 @@ Changelog
 
 For download links, please look at `Github release page <https://github.com/araffin/robotics-rl-srl/releases>`_.
 
+Release 1.3.0 (2019-??-??)
+--------------------------
+
+- added OmniRobot environment for simulation and real life setting
+- updated package version (gym, stable-baselines)
+- updated doc and tests
+- added script for merging datasets
+
 Release 1.2.0 (2019-01-17)
 --------------------------
 
diff --git a/docs/guide/envs.rst b/docs/guide/envs.rst
index 45512d915..928287270 100644
--- a/docs/guide/envs.rst
+++ b/docs/guide/envs.rst
@@ -57,6 +57,10 @@ You can find a recap table in the README.
 
    -  RoboboGymEnv-v0: A bridge to use a Robobo robot with ROS.
 
+-  Omnidirectional Robot: A mobile robot on a 2d terrain that must reach a target position.
+
+   -  OmnirobotEnv-v0: A bridge to use a real Omnirobot with ROS (also available in simulation).
+
 
 Generating Data
 ---------------
diff --git a/docs/guide/real_robots.rst b/docs/guide/real_robots.rst
index f4d092e35..2a1f20618 100644
--- a/docs/guide/real_robots.rst
+++ b/docs/guide/real_robots.rst
@@ -2,8 +2,8 @@
 
 .. _working-with-real-robots:-baxter-and-robobo:
 
-Working With Real Robots: Baxter and Robobo
-===========================================
+Working With Real Robots: Baxter, Robobo and Omnirobot
+======================================================
 
 Baxter Robot with Gazebo and ROS
 --------------------------------
@@ -257,10 +257,158 @@ RL on a Real Robobo
 
 ::
 
-   python -m visdom.server
+    python -m visdom.server
 
 4. Train the agent (python 3)
 
 ::
 
-   python -m rl_baselines.train --srl-model ground_truth --log-dir logs_real/ --num-stack 1 --algo ppo2 --env RoboboGymEnv-v0
+    python -m rl_baselines.train --srl-model ground_truth --log-dir logs_real/ --num-stack 1 --algo ppo2 --env RoboboGymEnv-v0
+
+
+Working With a Real Omnirobot
+-----------------------------
+
+By default, Omnirobot uses the same reward and terminal policy with the MobileRobot environment.
+Thus each episodes will have exactly 251 steps, and when the robot touches the target,
+it will get ``reward=1``, when it touches the border, it will get ``reward=-1``, otherwise, ``reward=0``.
+
+All the important parameters are writed in constants.py,
+thus you can simply modified the reward or terminal policy of this environment.
+
+Architecture of Omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
+Architecture of Real Omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The omnirobot's environment contains two principle components (two threads).
+
+- ``real_robots/omnirobot_server.py`` (python2, using ROS to communicate with robot)
+- ``environments/omnirobot_gym/omnirobot_env.py`` (python3, wrapped baseline environment)
+
+These two components uses zmq socket to communicate. The socket port can be changed, and by defualt it's 7777.
+These two components should be launched manually, because they use different environment (ROS and anaconda).
+
+Architecture of Omnirobot Simulator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The simulator has only one thread, omnirobot_env. The simulator is a object of this running thread,
+it uses exactly the same api as ``zmq``,
+thus ``omnirobot_server`` can be easily switched to ``omnirobot_simulator_server`` without
+changing code of ``omnirobot_env``.
+
+Switch between real robot and simulator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- Switch from real robot to simulator
+  modify ``real_robots/constants.py``, set ``USING_OMNIROBOT = False`` and ``USING_OMNIROBOT_SIMULATOR = True``
+- Switch from simulator to real robot:
+  modify ``real_robots/constants.py``, set ``USING_OMNIROBOT = True`` and ``USING_OMNIROBOT_SIMULATOR = False``
+
+Real Omnirobot
+~~~~~~~~~~~~~~
+Omnirobot offers the clean environment for RL, for each step of RL,
+the real robot does a close-loop positional control to reach the supposed position.
+
+When the robot is moving, ``omnirobot_server`` will be blocked until it receives a msg from the topic ``finished``,
+which is sent by the robot.
+This blocking has a time out (by default 30s), thus if anything unexpected happens,
+the ``omnirobot_server`` will fail and close.
+
+Launch RL on real omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To launch the rl  experience of omnirobot, do these step-by-step:
+
+- switch to real robot (modify constans.py, ensure  ``USING_OMNIROBOT = True``)
+- setup ROS environment and comment ``anaconda`` in ``~/.bashrc``, launch a new terminal, run
+
+::
+
+    python -m real_robots.omnirobot_server
+
+- comment ROS environment and uncomment ``anaconda`` in ``~/.bashrc``, launch a new terminal.
+
+- If you want to train RL on real robot, run (with other options customizable):
+
+::
+
+    python -m rl_baselines.train --env OmnirobotEnv-v0
+
+- If you want to replay the RL policy on real robot, which can be trained on the simulator, run:
+
+::
+
+    python -m replay.enjoy_baselines --log-dir path/to/RL/logs -render
+
+Recording Data of real omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To launch a acquisition of real robot's dataset, do these step-by-step:
+
+- switch to real robot (modify constans.py, ensure  ``USING_OMNIROBOT = True``)
+- setup ROS environment and comment `anaconda` in `~/.bashrc`, launch a new terminal, run:
+
+::
+
+    python -m real_robots.omnirobot_server
+
+- Change ``episodes`` to the number of you want in ``environments/omnirobot_gym/test_env.py``
+- comment ROS environment and uncomment ``anaconda`` in ``~/.bashrc``, launch a new terminal, run:
+
+::
+
+    python -m environments.omnirobot_gym.test_env
+
+Note that you should move the target manually between the different episodes.
+Attention, you can try to use Random Agent or a agent always do the toward target policy
+(this can increase the positive reward proportion in the dataset),
+or combine them by setting a proportion (``TORWARD_TARGET_PROPORTION``).
+
+
+Omnirobot Simulator
+~~~~~~~~~~~~~~~~~~~
+This simulator uses photoshop tricks to make realistic image of environment. It need several image as input:
+
+- back ground image (480x480, undistorted)
+- robot's tag/code, cropped from a real environment image(480x480, undistorted), with a margin 3 or 4 pixels.
+- target's tag/code, cropped from a real environment image (480x480, undistorted), with a margin 3 or 4 pixels.
+
+It also needs some important information:
+
+- margin of markerts
+- camera info file's path, which generated by ROS' camera_calibration package.
+
+The camera matrix should be corresponding with original image size (eg. 640x480 for our case)
+
+The detail of the inputs above can be find from OmniRobotEnvRender's comments.
+
+Noise of Omnirobot Simulator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To make the simulator more general, and make RL/SRL more stable, several types of noise are added to it.
+The parameters of these noises can be modified from the top of ``omnirobot_simulator_server.py``
+
+- noise of robot position, yaw.
+  Gaussian noise, controlled by ``NOISE_VAR_ROBOT_POS`` and ``NOISE_VAR_ROBOT_YAW``.
+- noise of markers in pixel-wise.
+  Gaussian noise to simulate camera's noise, apply pixel-wise noise on the markers' images,
+  controlled by ``NOISE_VAR_TARGET_PIXEL`` and ``NOISE_VAR_ROBOT_PIXEL``.
+- noise of environment's luminosity.
+  Apply Gaussian noise on LAB space of output image, to simulate the environment's luminosity change,
+  controlled by ``NOISE_VAR_ENVIRONMENT``.
+- noise of marker's size.
+  Change size of robot's and target's marker proportionally, to simulate the position variance on the vertical axis.
+  This kind of  noise is controlled by ``NOISE_VAR_ROBOT_SIZE_PROPOTION`` and ``NOISE_VAR_TARGET_SIZE_PROPOTION``.
+
+Known issues of Omnirobot
+~~~~~~~~~~~~~~~~~~~~~~~~~
+- The script ``omnirobot_server.py`` in robotics-rl-srl cannot be simply quitted by ctrl-c.
+    - This is because the zmq in python2 uses blocking behavior, even SIGINT cannot be detected when it is blocking.
+    - To quit the program, you should send ``SIGKILL`` to it. This can be done by ``kill -9`` or ``htop``.
+- Error: ``ImportError: /opt/ros/kinetic/lib/python2.7/dist-packages/cv2.so: undefined symbol: PyCObject_Type``
+    - You probably run a program expected to run in ``conda`` environment, sometimes even ``~/.bashrc`` is changed, and correctly applies ``source ~/.bashrc``, the environment still stays with ``ros``.
+    - In this situation, simply re-check the contents in ``~/.bashrc``, and open another new terminal to launch the programme.
+- Stuck at ``wait for client to connect`` or ``waiting to connect server``, there are several possible reasons.
+    - Port for client and server are not same. Try to use the same one
+    - Port is occupied by another client/server, you should kill it. If you cannot find the process which occupies this port, use ``fuser 7777\tcp -k`` to kill it directly. (7777 can be changed to any number of port).
+
+
diff --git a/docs/index.rst b/docs/index.rst
index 3f2b8fa60..c2df10813 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -40,10 +40,8 @@ Main Features
 
 Related papers:
 
-- "Decoupling feature extraction from policy learning: assessing benefits of state representation learning in goal based robotics" (Raffin et al. 2018) https://openreview.net/forum?id=Hkl-di09FQ
--  "S-RL Toolbox: Environments, Datasets and Evaluation Metrics for
-   State Representation Learning" (Raffin et al., 2018)
-   `https://arxiv.org/abs/1809.09369 <https://arxiv.org/abs/1809.09369>`__
+- "Decoupling feature extraction from policy learning: assessing benefits of state representation learning in goal based robotics" (Raffin et al. 2018) `https://arxiv.org/abs/1901.08651 <https://arxiv.org/abs/1901.08651>`__
+-  "S-RL Toolbox: Environments, Datasets and Evaluation Metrics for State Representation Learning" (Raffin et al., 2018) `https://arxiv.org/abs/1809.09369 <https://arxiv.org/abs/1809.09369>`__
 
 .. note::
   This documentation only gives an overview of the RL Toolbox,
diff --git a/environment.yml b/environment.yml
index dba26eadb..2904538ff 100644
--- a/environment.yml
+++ b/environment.yml
@@ -98,6 +98,7 @@ dependencies:
     - enum34==1.1.6
     - future==0.16.0
     - futures==3.1.1
+    - gym==0.11.0
     - git+https://github.com/hyperopt/hyperopt.git
     - idna==2.6
     - joblib==0.11
@@ -119,8 +120,8 @@ dependencies:
     - requests==2.18.4
     - seaborn==0.8.1
     - scikit-learn==0.19.1
-    - scipy==1.0.0
-    - stable-baselines==2.4.0
+    - scipy==1.2.0
+    - stable-baselines==2.5.0
     - termcolor==1.1.0
     - torchfile==0.1.0
     - tornado==4.5.3
diff --git a/environments/change_to_relative_pos.py b/environments/change_to_relative_pos.py
new file mode 100644
index 000000000..e6313eaf3
--- /dev/null
+++ b/environments/change_to_relative_pos.py
@@ -0,0 +1,42 @@
+import argparse
+from os.path import join
+import shutil
+
+import numpy as np
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Change existed dataset whose ground_truth is global position to relative position')
+    parser.add_argument('--data-src', type=str, default=None, help='source data folder (global position)')
+    parser.add_argument('--data-dst', type=str, default=None, help='destination data folder, (relative position)')
+
+    args = parser.parse_args()
+    assert args.data_src is not None
+    assert args.data_dst is not None
+    ground_truth = np.load(join(args.data_src, 'ground_truth.npz'))
+    preprocessed_data = np.load(join(args.data_src, 'preprocessed_data.npz'))
+
+    shutil.copytree(args.data_src, args.data_dst)
+    episode_starts = preprocessed_data['episode_starts']
+    print(ground_truth.keys())
+    ground_truth_states = ground_truth['ground_truth_states']
+    target_position = ground_truth['target_positions']
+
+    episode_num = -1
+    
+    print(ground_truth_states.shape)
+    for i in range(ground_truth_states.shape[0]):
+        if episode_starts[i] is True:
+            episode_num += 1
+        ground_truth_states[i, :] = ground_truth_states[i, :] - target_position[episode_num]
+    new_ground_truth = {}
+    for key in ground_truth.keys():
+        if key != 'ground_truth_states':
+            new_ground_truth[key] = ground_truth[key]
+    new_ground_truth['ground_truth_states'] = ground_truth_states
+    np.savez(join(args.data_dst, 'ground_truth.npz'), **new_ground_truth)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/environments/dataset_fusioner.py b/environments/dataset_fusioner.py
new file mode 100644
index 000000000..de5decb72
--- /dev/null
+++ b/environments/dataset_fusioner.py
@@ -0,0 +1,121 @@
+from __future__ import division, absolute_import, print_function
+
+import glob
+import argparse
+import os
+import shutil
+
+import numpy as np
+from tqdm import tqdm
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Dataset Manipulator: useful to merge two datasets by concatenating '
+                                                 + 'episodes. PS: Deleting sources after merging into the destination '
+                                                 + 'folder.')
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('--merge', type=str, nargs=3, metavar=('source_1', 'source_2', 'destination'),
+                       default=argparse.SUPPRESS,
+                       help='Merge two datasets by appending the episodes, deleting sources right after.')
+
+    args = parser.parse_args()
+
+    if 'merge' in args:
+        # let make sure everything is in order
+        assert os.path.exists(args.merge[0]), "Error: dataset '{}' could not be found".format(args.merge[0])
+        assert (not os.path.exists(args.merge[2])), \
+            "Error: dataset '{}' already exists, cannot rename '{}' to '{}'".format(args.merge[2], args.merge[0],
+                                                                                    args.merge[2])
+        # create the output
+        os.mkdir(args.merge[2])
+
+        # copy files from first source
+        os.rename(args.merge[0] + "/dataset_config.json", args.merge[2] + "/dataset_config.json")
+        os.rename(args.merge[0] + "/env_globals.json", args.merge[2] + "/env_globals.json")
+
+        for record in sorted(glob.glob(args.merge[0] + "/record_[0-9]*/*")):
+            s = args.merge[2] + "/" + record.split("/")[-2] + '/' + record.split("/")[-1]
+            os.renames(record, s)
+
+        num_episode_dataset_1 = int(record.split("/")[-2][7:]) + 1
+
+        # copy files from second source
+        for record in sorted(glob.glob(args.merge[1] + "/record_[0-9]*/*")):
+            episode = str(num_episode_dataset_1 + int(record.split("/")[-2][7:]))
+            new_episode = record.split("/")[-2][:-len(episode)] + episode
+            s = args.merge[2] + "/" + new_episode + '/' + record.split("/")[-1]
+            os.renames(record, s)
+        num_episode_dataset_2 = int(record.split("/")[-2][7:]) + 1
+
+        # load and correct ground_truth
+        ground_truth = {}
+        ground_truth_load = np.load(args.merge[0] + "/ground_truth.npz")
+        ground_truth_load_2 = np.load(args.merge[1] + "/ground_truth.npz")
+        ground_truth["images_path"] = []
+        num_episode_dataset = num_episode_dataset_1
+
+        index_slash = args.merge[2].find("/")
+        index_margin_str = len("/record_")
+        directory_str = args.merge[2][index_slash+1:]
+
+        for idx_, gt_load in enumerate([ground_truth_load, ground_truth_load_2], 1):
+            for arr in gt_load.files:
+                if arr == "images_path":
+                    # here, we want to rename just the folder containing the records, hence the black magic
+
+                    for i in tqdm(range(len(gt_load["images_path"])),
+                                  desc="Update of paths (Folder " + str(1+idx_) + ")"):
+                        # find the "record_" position
+                        path = gt_load["images_path"][i]
+                        end_pos = path.find("/record_")
+                        inter_pos = path.find("/frame")  # pos in the complete path.
+
+                        if idx_ > 1:
+                            episode = str(num_episode_dataset_1 + int(path[end_pos + index_margin_str: inter_pos]))
+                            episode = episode.zfill(3)
+                            new_record_path = "/record_" + episode + path[inter_pos:]
+                        else:
+                            new_record_path = path[end_pos:]
+                        ground_truth["images_path"].append(directory_str + new_record_path)
+                else:
+                    # anything that isnt image_path, we dont need to change
+                    gt_arr = gt_load[arr]
+
+                    if idx_ > 1:
+                        num_episode_dataset = num_episode_dataset_2
+
+                    # HERE check before overwritting that the target is random !+
+                    if gt_load[arr].shape[0] < num_episode_dataset:
+                        gt_arr = np.repeat(gt_load[arr], num_episode_dataset, axis=0)
+
+                    if idx_ > 1:
+                        ground_truth[arr] = np.concatenate((ground_truth[arr], gt_arr), axis=0)
+                    else:
+                        ground_truth[arr] = gt_arr
+
+        # save the corrected ground_truth
+        np.savez(args.merge[2] + "/ground_truth.npz", **ground_truth)
+
+        # load and correct the preprocessed data (actions, rewards etc)
+        preprocessed = {}
+        preprocessed_load = np.load(args.merge[0] + "/preprocessed_data.npz")
+        preprocessed_load_2 = np.load(args.merge[1] + "/preprocessed_data.npz")
+
+        for prepro_load in [preprocessed_load, preprocessed_load_2]:
+            for arr in prepro_load.files:
+                pr_arr = prepro_load[arr]
+                preprocessed[arr] = np.concatenate((preprocessed.get(arr, []), pr_arr), axis=0)
+                if arr == "episode_starts":
+                    preprocessed[arr] = preprocessed[arr].astype(bool)
+                else:
+                    preprocessed[arr] = preprocessed[arr].astype(int)
+
+        np.savez(args.merge[2] + "/preprocessed_data.npz", ** preprocessed)
+
+        # remove the old folders
+        shutil.rmtree(args.merge[0])
+        shutil.rmtree(args.merge[1])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/environments/dataset_generator.py b/environments/dataset_generator.py
index 3872b2fb4..283cf0438 100644
--- a/environments/dataset_generator.py
+++ b/environments/dataset_generator.py
@@ -8,13 +8,13 @@
 import time
 
 import numpy as np
-from gym.spaces import prng
 from stable_baselines import PPO2
 from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
 from stable_baselines.common.policies import CnnPolicy
 
 from environments import ThreadingType
 from environments.registry import registered_env
+from real_robots.constants import USING_OMNIROBOT
 from srl_zoo.utils import printRed, printYellow
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # used to remove debug info of tensorflow
@@ -62,15 +62,16 @@ def env_thread(args, thread_num, partition=True, use_ppo2=False):
 
     env_class = registered_env[args.env][0]
     env = env_class(**env_kwargs)
-
-    # Additional env when using a trained ppo agent to generate data
-    # instead of a random agent
-    train_env = env_class(**{**env_kwargs, "record_data": False, "renders": False})
-    train_env = DummyVecEnv([lambda: train_env])
-    train_env = VecNormalize(train_env, norm_obs=True, norm_reward=False)
+    using_real_omnibot = args.env == "OmnirobotEnv-v0" and USING_OMNIROBOT
 
     model = None
     if use_ppo2:
+        # Additional env when using a trained ppo agent to generate data
+        # instead of a random agent
+        train_env = env_class(**{**env_kwargs, "record_data": False, "renders": False})
+        train_env = DummyVecEnv([lambda: train_env])
+        train_env = VecNormalize(train_env, norm_obs=True, norm_reward=False)
+
         model = PPO2(CnnPolicy, train_env).learn(args.ppo2_timesteps)
 
     frames = 0
@@ -82,22 +83,34 @@ def env_thread(args, thread_num, partition=True, use_ppo2=False):
                (thread_num if thread_num <= args.num_episode % args.num_cpu else args.num_episode % args.num_cpu)
 
         env.seed(seed)
-        prng.seed(seed)  # this is for the sample() function from gym.space
+        env.action_space.seed(seed)  # this is for the sample() function from gym.space
         obs = env.reset()
         done = False
         t = 0
+        episode_toward_target_on = False
         while not done:
             env.render()
 
             if use_ppo2:
                 action, _ = model.predict([obs])
             else:
-                action = [env.action_space.sample()]
+                # Using a target reaching policy (untrained, from camera) when collecting data from real OmniRobot
+                if episode_toward_target_on and np.random.rand() < args.toward_target_timesteps_proportion and \
+                        using_real_omnibot:
+                    action = [env.actionPolicyTowardTarget()]
+                else:
+                    action = [env.action_space.sample()]
+
+            action_to_step = action[0]
+            _, _, done, _ = env.step(action_to_step)
 
-            _, _, done, _ = env.step(action[0])
             frames += 1
             t += 1
             if done:
+                if np.random.rand() < args.toward_target_timesteps_proportion and using_real_omnibot:
+                    episode_toward_target_on = True
+                else:
+                    episode_toward_target_on = False
                 print("Episode finished after {} timesteps".format(t + 1))
 
         if thread_num == 0:
@@ -134,6 +147,8 @@ def main():
                         help='runs a ppo2 agent instead of a random agent')
     parser.add_argument('--ppo2-timesteps', type=int, default=1000,
                         help='number of timesteps to run PPO2 on before generating the dataset')
+    parser.add_argument('--toward-target-timesteps-proportion', type=float, default=0.0,
+                        help="propotion of timesteps that use simply towards target policy, should be 0.0 to 1.0")
     args = parser.parse_args()
 
     assert (args.num_cpu > 0), "Error: number of cpu must be positive and non zero"
diff --git a/environments/mobile_robot/mobile_robot_env.py b/environments/mobile_robot/mobile_robot_env.py
index a0480c7a8..f072070ac 100644
--- a/environments/mobile_robot/mobile_robot_env.py
+++ b/environments/mobile_robot/mobile_robot_env.py
@@ -248,7 +248,6 @@ def step(self, action):
             dv += self.np_random.normal(0.0, scale=NOISE_STD)
             # scale action amplitude between -dv and dv
             real_action = np.maximum(np.minimum(action, 1), -1) * dv
-
         if self.verbose:
             print(np.array2string(np.array(real_action), precision=2))
 
diff --git a/environments/omnirobot_gym/__init__.py b/environments/omnirobot_gym/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/omnirobot_gym/omnirobot_env.py b/environments/omnirobot_gym/omnirobot_env.py
new file mode 100644
index 000000000..f5e71f0d4
--- /dev/null
+++ b/environments/omnirobot_gym/omnirobot_env.py
@@ -0,0 +1,383 @@
+"""
+This program allows to run Omnirobot Gym Environment as a module
+"""
+
+import numpy as np
+import cv2
+import zmq
+from gym import spaces
+import torch as th
+import matplotlib.pyplot as plt
+import seaborn as sns
+import yaml
+from scipy.spatial.transform import Rotation as R
+
+from environments.srl_env import SRLGymEnv
+from real_robots.constants import *
+from real_robots.omnirobot_utils.utils import RingBox, PosTransformer
+from state_representation.episode_saver import EpisodeSaver
+
+if USING_OMNIROBOT_SIMULATOR:
+    from real_robots.omnirobot_simulator_server import OmniRobotSimulatorSocket
+
+    def recvMatrix(socket):
+        return socket.recv_image()
+
+else:
+    from real_robots.utils import recvMatrix
+
+RENDER_HEIGHT = 224
+RENDER_WIDTH = 224
+RELATIVE_POS = True
+N_CONTACTS_BEFORE_TERMINATION = 3
+
+DELTA_POS = 0.1  # DELTA_POS for continuous actions
+N_DISCRETE_ACTIONS = 4
+
+# Init seaborn
+sns.set()
+
+
+def getGlobals():
+    """
+    :return: (dict)
+    """
+    return globals()
+
+
+def bgr2rgb(bgr_img):
+    """
+    Convert an image from BGR to RGB
+    :param bgr_img: np.ndarray
+    :return: np.ndarray
+    """
+    return cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
+
+
+class OmniRobotEnv(SRLGymEnv):
+    """
+    OmniRobot robot Environment (Gym wrapper for OmniRobot environment)
+    The goal of Omnirobot is to go to the location on the table
+    (signaled with a circle sticker on the table)
+    :param renders: (bool) Whether to display the GUI or not
+    :param is_discrete: (bool) true if action space is discrete vs continuous
+    :param save_path: (str) name of the folder where recorded data will be stored
+    :param state_dim: (int) When learning states
+    :param learn_states: (bool)
+    :param record_data: (bool) Set to true, record frames with the rewards.
+    :param shape_reward: (bool) Set to true, reward = -distance_to_goal
+    :param env_rank: (int) the number ID of the environment
+    :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model
+    """
+
+    def __init__(self, renders=False, name="Omnirobot", is_discrete=True, save_path='srl_zoo/data/', state_dim=-1,
+                 learn_states=False, srl_model="raw_pixels", record_data=False, action_repeat=1, random_target=True,
+                 shape_reward=False, env_rank=0, srl_pipe=None, **_):
+
+        super(OmniRobotEnv, self).__init__(srl_model=srl_model,
+                                           relative_pos=RELATIVE_POS,
+                                           env_rank=env_rank,
+                                           srl_pipe=srl_pipe)
+        if action_repeat != 1:
+            raise NotImplementedError
+        self.server_port = SERVER_PORT + env_rank
+        self.n_contacts = 0
+        use_ground_truth = srl_model == 'ground_truth'
+        use_srl = srl_model != 'raw_pixels'
+        self.use_srl = use_srl or use_ground_truth
+        self.use_ground_truth = use_ground_truth
+        self.use_joints = False
+        self.relative_pos = RELATIVE_POS
+        self._is_discrete = is_discrete
+        self.observation = []
+        # Start simulation with first observation
+        self._env_step_counter = 0
+        self.episode_terminated = False
+        self.state_dim = state_dim
+
+        self._renders = renders
+        self._shape_reward = shape_reward
+        self.cuda = th.cuda.is_available()
+        self.target_pos = None
+        self.saver = None
+        self._random_target = random_target
+
+        if self._is_discrete:
+            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
+        else:
+            action_dim = 2
+            self.action_space = RingBox(positive_low=ACTION_POSITIVE_LOW, positive_high=ACTION_POSITIVE_HIGH,
+                                        negative_low=ACTION_NEGATIVE_LOW, negative_high=ACTION_NEGATIVE_HIGH,
+                                        shape=np.array([action_dim]), dtype=np.float32)
+        # SRL model
+        if self.use_srl:
+            if use_ground_truth:
+                self.state_dim = self.getGroundTruthDim()
+            self.dtype = np.float32
+            self.observation_space = spaces.Box(
+                low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=self.dtype)
+        else:
+            self.dtype = np.uint8
+            self.observation_space = spaces.Box(low=0, high=255, shape=(RENDER_WIDTH, RENDER_HEIGHT, 3),
+                                                dtype=self.dtype)
+
+        if record_data:
+            print("Recording data...")
+            self.saver = EpisodeSaver(name, 0, self.state_dim, globals_=getGlobals(),
+                                      relative_pos=RELATIVE_POS,
+                                      learn_states=learn_states, path=save_path)
+
+        if USING_OMNIROBOT_SIMULATOR:
+            self.socket = OmniRobotSimulatorSocket(
+                output_size=[RENDER_WIDTH, RENDER_HEIGHT], random_target=self._random_target)
+        else:
+            # Initialize Baxter effector by connecting to the Gym bridge ROS node:
+            self.context = zmq.Context()
+            self.socket = self.context.socket(zmq.PAIR)
+            self.socket.connect(
+                "tcp://{}:{}".format(HOSTNAME, self.server_port))
+
+            # note: if takes too long, run first client, then server
+            print("Waiting for server connection at port {}...".format(
+                self.server_port))
+
+            # hide the output of server
+            msg = self.socket.recv_json()
+            print("Connected to server on port {} (received message: {})".format(
+                self.server_port, msg))
+
+        self.action = [0, 0]
+        self.reward = 0
+        self.robot_pos = np.array([0, 0])
+
+        # Initialize the state
+        if self._renders:
+            self.image_plot = None
+
+    def actionPolicyTowardTarget(self):
+        """
+        :return: (int) action
+        """
+        if abs(self.robot_pos[0] - self.target_pos[0]) > abs(self.robot_pos[1] - self.target_pos[1]):
+
+            if self._is_discrete:
+                return int(Move.FORWARD) if self.robot_pos[0] < self.target_pos[0] else int(Move.BACKWARD)
+                # forward                                        # backward
+            else:
+                return DELTA_POS if self.robot_pos[0] < self.target_pos[0] else -DELTA_POS
+        else:
+            if self._is_discrete:
+                # left                                          # right
+                return int(Move.LEFT) if self.robot_pos[1] < self.target_pos[1] else int(Move.RIGHT)
+            else:
+                return DELTA_POS if self.robot_pos[1] < self.target_pos[1] else -DELTA_POS
+
+    def step(self, action):
+        """
+        :action: (int)
+        :return: (tensor (np.ndarray)) observation, int reward, bool done, dict extras)
+        """
+        if not self._is_discrete:
+            action = np.array(action)
+        assert self.action_space.contains(action)
+
+        # Convert int action to action in (x,y,z) space
+        # serialize the action
+        if isinstance(action, np.ndarray):
+            self.action = action.tolist()
+        elif hasattr(action, 'dtype'):  # convert numpy type to python type
+            self.action = action.item()
+        else:
+            self.action = action
+
+        self._env_step_counter += 1
+
+        # Send the action to the server
+        self.socket.send_json(
+            {"command": "action", "action": self.action, "is_discrete": self._is_discrete})
+
+        # Receive state data (position, etc), important to update state related values
+        self.getEnvState()
+
+        #  Receive a camera image from the server
+        self.observation = self.getObservation()
+        done = self._hasEpisodeTerminated()
+
+        self.render()
+
+        if self.saver is not None:
+            self.saver.step(self.observation, action,
+                            self.reward, done, self.getGroundTruth())
+        if self.use_srl:
+            return self.getSRLState(self.observation), self.reward, done, {}
+        else:
+            return self.observation, self.reward, done, {}
+
+    def getEnvState(self):
+        """
+        Returns a dictionary containing info about the environment state.
+        :return: (dict) state_data containing data related to the state: target_pos,
+        robot_pos and reward.
+        """
+        state_data = self.socket.recv_json()
+        self.reward = state_data["reward"]
+        self.target_pos = np.array(state_data["target_pos"])
+        self.robot_pos = np.array(state_data["position"])
+
+        return state_data
+
+    def getObservation(self):
+        """
+        Receive the observation image using a socket
+        :return: (numpy ndarray) observation
+        """
+        # Receive a camera image from the server
+        self.observation = recvMatrix(self.socket)
+        # Resize it:
+        self.observation = cv2.resize(
+            self.observation, (RENDER_WIDTH, RENDER_HEIGHT), interpolation=cv2.INTER_AREA)
+        return self.observation
+
+    def getTargetPos(self):
+        """
+        :return (numpy array): Position of the target (button)
+        """
+        return self.target_pos
+
+    @staticmethod
+    def getGroundTruthDim():
+        """
+        :return: (int)
+        """
+        return 2
+
+    def getGroundTruth(self):
+        """
+        Alias for getRobotPos for compatibility between envs
+        :return: (numpy array)
+        """
+        return np.array(self.getRobotPos())
+
+    def getRobotPos(self):
+        """
+        :return: ([float])->  np.ndarray Position (x, y, z) of Baxter left gripper
+        """
+        return self.robot_pos
+
+    def reset(self):
+        """
+        Reset the environment
+        :return: (numpy ndarray) first observation of the env
+        """
+        self.episode_terminated = False
+        # Step count since episode start
+        self._env_step_counter = 0
+        # set n contact count
+        self.n_contacts = 0
+        self.socket.send_json({"command": "reset"})
+        # Update state related variables, important step to get both data and
+        # metadata that allow reading the observation image
+        self.getEnvState()
+        self.observation = self.getObservation()
+        if self.saver is not None:
+            self.saver.reset(self.observation,
+                             self.getTargetPos(), self.getGroundTruth())
+        if self.use_srl:
+            return self.getSRLState(self.observation)
+        else:
+            return self.observation
+
+    def _hasEpisodeTerminated(self):
+        """
+        Returns True if the episode is over and False otherwise
+        """
+        if self.episode_terminated or self._env_step_counter > MAX_STEPS:
+            return True
+
+        if np.abs(self.reward - REWARD_TARGET_REACH) < 0.000001:  # reach the target
+            self.n_contacts += 1
+        else:
+            self.n_contacts = 0
+        return False
+
+    def closeServerConnection(self):
+        """
+        To be called at the end of running the program, externally
+        """
+        print("Omnirobot_env client exiting and closing socket...")
+        self.socket.send_json({"command": "exit"})
+        self.socket.close()
+
+    def render(self, mode='rgb_array'):
+        """
+        :param mode: (str)
+        :return: (numpy array) BGR image
+        """
+        if self._renders:
+            if mode != "rgb_array":
+                print('render in human mode not yet supported')
+                return np.array([])
+
+            plt.ion()  # needed for interactive update
+            if self.image_plot is None:
+                plt.figure('Omnirobot RL')
+                self.initVisualizeBoundary()
+                self.visualizeBoundary()
+                self.image_plot = plt.imshow(self.observation_with_boundary, cmap='gray')
+                self.image_plot.axes.grid(False)
+                
+            else:
+                self.visualizeBoundary()
+                self.image_plot.set_data(self.observation_with_boundary)
+            plt.draw()
+            # Wait a bit, so that plot is visible
+            plt.pause(0.0001)
+        return self.observation
+    
+    def initVisualizeBoundary(self):
+        with open(CAMERA_INFO_PATH, 'r') as stream:
+            try:
+                contents = yaml.load(stream)
+                camera_matrix = np.array(contents['camera_matrix']['data']).reshape((3, 3))
+                distortion_coefficients = np.array(contents['distortion_coefficients']['data']).reshape((1, 5))
+            except yaml.YAMLError as exc:
+                print(exc)
+
+        # camera installation info
+        r = R.from_euler('xyz', CAMERA_ROT_EULER_COORD_GROUND, degrees=True)
+        camera_rot_mat_coord_ground = r.as_dcm()
+
+        pos_transformer = PosTransformer(camera_matrix, distortion_coefficients, CAMERA_POS_COORD_GROUND,
+                                         camera_rot_mat_coord_ground)
+
+        self.boundary_coner_pixel_pos = np.zeros((2, 4))
+        # assume that image is undistorted
+        self.boundary_coner_pixel_pos[:, 0] = \
+            pos_transformer.phyPosGround2PixelPos([MIN_X, MIN_Y], return_distort_image_pos=False).squeeze()
+        self.boundary_coner_pixel_pos[:, 1] = \
+            pos_transformer.phyPosGround2PixelPos([MAX_X, MIN_Y], return_distort_image_pos=False).squeeze()
+        self.boundary_coner_pixel_pos[:, 2] = \
+            pos_transformer.phyPosGround2PixelPos([MAX_X, MAX_Y], return_distort_image_pos=False).squeeze()
+        self.boundary_coner_pixel_pos[:, 3] = \
+            pos_transformer.phyPosGround2PixelPos([MIN_X, MAX_Y], return_distort_image_pos=False).squeeze()
+
+        # transform the corresponding points into cropped image
+        self.boundary_coner_pixel_pos = self.boundary_coner_pixel_pos - (np.array(ORIGIN_SIZE) -
+                                                                         np.array(CROPPED_SIZE)).reshape(2, 1) / 2.0
+        
+        # transform the corresponding points into resized image (RENDER_WIDHT, RENDER_HEIGHT)
+        self.boundary_coner_pixel_pos[0, :] *= RENDER_WIDTH/CROPPED_SIZE[0]
+        self.boundary_coner_pixel_pos[1, :] *= RENDER_HEIGHT/CROPPED_SIZE[1]
+        
+        self.boundary_coner_pixel_pos = np.around(self.boundary_coner_pixel_pos).astype(np.int)
+
+    def visualizeBoundary(self):
+        """
+        visualize the unvisible boundary, should call initVisualizeBoundary firstly
+        """
+        self.observation_with_boundary = self.observation.copy()
+
+        for idx in range(4):
+            idx_next = idx + 1
+            cv2.line(self.observation_with_boundary, tuple(self.boundary_coner_pixel_pos[:, idx]),
+                     tuple(self.boundary_coner_pixel_pos[:, idx_next % 4]), (200, 0, 0), 3)
diff --git a/environments/omnirobot_gym/test_env.py b/environments/omnirobot_gym/test_env.py
new file mode 100644
index 000000000..754a5b377
--- /dev/null
+++ b/environments/omnirobot_gym/test_env.py
@@ -0,0 +1,37 @@
+import time
+import numpy as np
+import environments.omnirobot_gym.omnirobot_env as omnirobot_env
+
+
+
+
+
+timestr = time.strftime("%Y%m%d_%H%M%S")
+save_path = "srl_zoo/data/"
+name =  "omnirobot_"+ timestr
+env = omnirobot_env.OmniRobotEnv(name=name, renders=False, is_discrete=True, save_path=save_path, record_data=True)
+timesteps = 500  # must be greater than MAX_STEPS
+episodes = 50
+env.seed(1)
+i = 0
+TORWARD_TARGET_PROPORTION = 0.3
+print('Starting episodes..., {}%_action use torward target policy, {}%_action use random policy'.format(TORWARD_TARGET_PROPORTION, 1-TORWARD_TARGET_PROPORTION))
+start_time = time.time()
+try:
+    for _ in range(episodes):
+        observation = env.reset()
+        for t in range(timesteps):
+                if np.random.rand() > TORWARD_TARGET_PROPORTION:
+                    action = env.action_space.sample()
+                else:
+                    action = env.actionPolicyTowardTarget()     
+                observation, reward, done, info = env.step(action)
+                env.render()  # render() requires first the observation to be obtained
+                if done:
+                    print("Episode finished after {} timesteps".format(t + 1))
+                    break
+                i += 1
+except KeyboardInterrupt:
+    pass
+env.closeServerConnection()
+print("Avg. frame rate: {:.2f} FPS".format(i / (time.time() - start_time)))
diff --git a/environments/registry.py b/environments/registry.py
index 8fbc791fb..a5417773b 100644
--- a/environments/registry.py
+++ b/environments/registry.py
@@ -15,7 +15,7 @@
 from environments.mobile_robot.mobile_robot_line_target_env import MobileRobotLineTargetGymEnv
 from environments.gym_baxter.baxter_env import BaxterEnv
 from environments.robobo_gym.robobo_env import RoboboEnv
-
+from environments.omnirobot_gym.omnirobot_env import OmniRobotEnv
 
 def register(_id, **kvargs):
     if _id in registry.env_specs:
@@ -48,7 +48,8 @@ def isXAvailable():
     "MobileRobot1DGymEnv-v0":         (MobileRobot1DGymEnv, MobileRobotGymEnv, PlottingType.PLOT_2D, ThreadingType.PROCESS),
     "MobileRobotLineTargetGymEnv-v0": (MobileRobotLineTargetGymEnv, MobileRobotGymEnv, PlottingType.PLOT_2D, ThreadingType.PROCESS),
     "Baxter-v0":                      (BaxterEnv, SRLGymEnv, PlottingType.PLOT_3D, ThreadingType.NONE),
-    "RoboboGymEnv-v0":                (RoboboEnv, SRLGymEnv, PlottingType.PLOT_2D, ThreadingType.NONE)
+    "RoboboGymEnv-v0":                (RoboboEnv, SRLGymEnv, PlottingType.PLOT_2D, ThreadingType.NONE),
+    "OmnirobotEnv-v0":                (OmniRobotEnv, SRLGymEnv, PlottingType.PLOT_2D, ThreadingType.PROCESS),
 }
 
 # Environments only available when running in a terminal with X (hence only imported when available):
diff --git a/imgs/omnirobot.gif b/imgs/omnirobot.gif
new file mode 100644
index 000000000..ade4493b7
Binary files /dev/null and b/imgs/omnirobot.gif differ
diff --git a/real_robots/README.md b/real_robots/README.md
index 563d3f0ea..7ad46991f 100644
--- a/real_robots/README.md
+++ b/real_robots/README.md
@@ -1,14 +1,26 @@
 Table of Contents
 =================
-  * [Baxter Robot with Gazebo and ROS](#baxter-robot-with-gazebo-and-ros)
-  * [Working With a Real Baxter Robot](#working-with-a-real-baxter-robot)
-    * [Recording Data With a Random Agent for SRL](#recording-data-with-a-random-agent-for-srl)
-    * [RL on a Real Baxter Robot](#rl-on-a-real-baxter-robot)
-  * [Working With a Real Robobo](#working-with-a-real-robobo)
-    * [Recording Data With a Random Agent for SRL](#recording-data-with-a-random-agent-for-srl-1)
-    * [RL on a Real Robobo](#rl-on-a-real-robobo)
-
-
+- [Table of Contents](#table-of-contents)
+  - [Baxter Robot with Gazebo and ROS](#baxter-robot-with-gazebo-and-ros)
+  - [Working With a Real Baxter Robot](#working-with-a-real-baxter-robot)
+    - [Recording Data With a Random Agent for SRL](#recording-data-with-a-random-agent-for-srl)
+    - [RL on a Real Baxter Robot](#rl-on-a-real-baxter-robot)
+  - [Working With a Real Robobo](#working-with-a-real-robobo)
+    - [Recording Data With a Random Agent for SRL](#recording-data-with-a-random-agent-for-srl-1)
+    - [RL on a Real Robobo](#rl-on-a-real-robobo)
+  - [Working with a Omnirobot](#working-with-a-omnirobot)
+    - [Architecture of Omnirobot](#architecture-of-omnirobot)
+      - [Architecture of Real Omnirobot](#architecture-of-real-omnirobot)
+      - [Architecture of Omnirobot Simulator](#architecture-of-omnirobot-simulator)
+    - [Switch between real robot and simulator](#switch-between-real-robot-and-simulator)
+    - [Real Omnirobot](#real-omnirobot)
+      - [Launch RL on real omnirobot](#launch-rl-on-real-omnirobot)
+      - [Recording Data of real omnirobot](#recording-data-of-real-omnirobot)
+    - [Omnirobot Simulator](#omnirobot-simulator)
+      - [Noise of Omnirobot Simulator](#noise-of-omnirobot-simulator)
+      - [Train RL on Omnirobot Simulator](#train-rl-on-omnirobot-simulator)
+      - [Generate Data from Omnirobot Simulator](#generate-data-from-omnirobot-simulator)
+    - [Known issues of Omnirobot](#known-issues-of-omnirobot)
 ## Baxter Robot with Gazebo and ROS
 Gym Wrapper for baxter environment, more details in the dedicated README (environments/gym_baxter/README.md).
 
@@ -183,3 +195,91 @@ python -m visdom.server
 ```
 python -m rl_baselines.train --srl-model ground_truth --log-dir logs_real/ --num-stack 1 --algo ppo2 --env RoboboGymEnv-v0
 ```
+
+## Working with a Omnirobot
+By default, Omnirobot uses the same reward and terminal policy with the MobileRobot environment. Thus each episodes will have exactly 251 steps, and when the robot touches the target, it will get `reward=1`, when it touches the border, it will get `reward=-1`, otherwise, `reward=0`.
+
+All the important parameters are writed in constants.py, thus you can simply modified the reward or terminal policy of this environment.
+
+### Architecture of Omnirobot
+#### Architecture of Real Omnirobot
+The omnirobot's environment contains two principle components (two threads). 
+- `real_robots/omnirobot_server.py` (python2, using ROS to communicate with robot) 
+- `environments/omnirobot_gym/omnirobot_env.py` (python3, wrapped baseline environment)
+These two components uses zmq socket to communicate. The socket port can be changed, and by defualt it's 7777. 
+These two components should be launched manually, because they use different environment (ROS and anaconda).
+
+#### Architecture of Omnirobot Simulator
+The simulator has only one thread, omnirobot_env. The simulator is a object of this running thread, it uses exactly the same api as `zmq`, thus `omnirobot_server` can be easily switched to `omnirobot_simulator_server` without changing code of `omnirobot_env`.
+
+### Switch between real robot and simulator
+- Switch from real robot to simulator
+  modify `real_robots/constants.py`, set `USING_OMNIROBOT = False` and `USING_OMNIROBOT_SIMULATOR = True`
+- Switch from simulator to real robot:
+  modify `real_robots/constants.py`, set `USING_OMNIROBOT = True` and `USING_OMNIROBOT_SIMULATOR = False`
+
+### Real Omnirobot
+Omnirobot offers the clean environment for RL, for each step of RL, the real robot does a close-loop positional control to reach the supposed position.
+
+when the robot is moving, `omnirobot_server` will be blocked until it receives a msg from the topic `finished`, which is sent by the robot. This blocking has a time out (by default 30s), thus if anything unexpected happens, the `omnirobot_server` will fail and close.
+
+#### Launch RL on real omnirobot
+To launch the rl  experience of omnirobot, do these step-by-step:
+- switch to real robot (modify constans.py, ensure  `USING_OMNIROBOT = True`)
+- setup ROS environment and comment `anaconda` in `~/.bashrc`, launch a new terminal, run `python -m real_robots.omnirobot_server`
+- comment ROS environment and uncomment `anaconda` in `~/.bashrc`, launch a new terminal.
+  - If you want to train RL on real robot, run `python -m rl_baselines.train --env OmnirobotEnv-v0` with other options customizable. 
+  - If you want to replay the RL policy on real robot, which can be trained on the simulator, run `python -m replay.enjoy_baselines --log-dir path/to/RL/logs -render`
+
+#### Recording Data of real omnirobot
+To launch a acquisition of real robot's dataset, do these step-by-step:
+- switch to real robot (modify constans.py, ensure  `USING_OMNIROBOT = True`)
+- setup ROS environment and comment `anaconda` in `~/.bashrc`, launch a new terminal, run `python -m real_robots.omnirobot_server`
+- Change `episodes` to the number of you want in `environments/omnirobot_gym/test_env.py`
+- comment ROS environment and uncomment `anaconda` in `~/.bashrc`, launch a new terminal, run `python -m environments.omnirobot_gym.test_env`. Note that you should move the target manually between the different episodes. **Attention**, you can try to use Random Agent or a agent always do the toward target policy (this can increase the positive reward proportion in the dataset), or combine them by setting a proportion (`TORWARD_TARGET_PROPORTION`). 
+
+
+### Omnirobot Simulator
+This simulator uses photoshop tricks to make realistic image of environment. It need several image as input.
+- back ground image (480x480, undistorted)
+- robot's tag/code, cropped from a real environment image(480x480, undistorted), with a margin 3 or 4 pixels.
+- target's tag/code, cropped from a real environment image (480x480, undistorted), with a margin 3 or 4 pixels.
+
+It also needs some important information:
+- margin of markerts
+- camera info file's path, which generated by ROS' camera_calibration package. The camera matrix should be corresponding with original image size (eg. 640x480 for our case) 
+
+The detail of the inputs above can be find from OmniRobotEnvRender's comments.
+#### Noise of Omnirobot Simulator
+To make the simulator more general, and make RL/SRL more stable, several types of noise are added to it. The parameters of these noises can be modified from the top of omnirobot_simulator_server.py
+
+- noise of robot position, yaw.
+  Gaussian noise, controlled by `NOISE_VAR_ROBOT_POS` and `NOISE_VAR_ROBOT_YAW`.
+- noise of markers in pixel-wise.
+  Gaussian noise to simulate camera's noise, apply pixel-wise noise on the markers' images, controlled by `NOISE_VAR_TARGET_PIXEL` and `NOISE_VAR_ROBOT_PIXEL`.
+- noise of environment's luminosity.
+  Apply Gaussian noise on LAB space of output image, to simulate the environment's luminosity change, controlled by `NOISE_VAR_ENVIRONMENT`.
+- noise of marker's size.
+  Change size of robot's and target's marker proportionally, to simulate the position variance on the vertical axis. This kind of  noise is controlled by `NOISE_VAR_ROBOT_SIZE_PROPOTION` and `NOISE_VAR_TARGET_SIZE_PROPOTION`.
+#### Train RL on Omnirobot Simulator
+- switch to simulator (modify constans.py, ensure  `USING_OMNIROBOT_SIMULATOR = True`)
+- run directly `python -m rl_baselines.train --env OmnirobotEnv-v0` with other options customizable. 
+
+#### Generate Data from Omnirobot Simulator
+- run directly `python -m environments.dataset_generator --env OmnirobotEnv-v0` with other options customizable. 
+
+### Known issues of Omnirobot
+- error: `No module named 'scipy.spatial.transform'`, use `pip3 install scipy==1.2` to solve it
+- `omnirobot_server.py` in robotics-rl-srl cannot be simply quitted by ctrl-c.
+    - This is because the zmq in python2 uses blocking behavior, even SIGINT cannot be detected when it is blocking.
+    - To quit the program, you should send SIGKILL to it. This can be done by `kill -9` or `htop`.
+- `ImportError: /opt/ros/kinetic/lib/python2.7/dist-packages/cv2.so: undefined symbol: PyCObject_Type`
+    - You probably run a program expected to run in `conda` environment, sometimes even `~/.bashrc` is changed, and correctly applies `source ~/.bashrc`, the environment still stays with `ros`.
+    - In this situation, simply re-check the contents in `~/.bashrc`, and open another new terminal to launch the programme.
+- stuck at `wait for client to connect` or `waiting to connect server`, there are several possible reasons.
+    - Port for client and server are not same. Try to use the same one
+    - Port is occupied by another client/server, you should kill it. If you cannot find the process which occupies this port, use `fuser 7777\tcp -k` to kill it directly. (7777 can be changed to any number of port).
+
+### TODO
+  - joint states
+  - second camera
\ No newline at end of file
diff --git a/real_robots/constants.py b/real_robots/constants.py
index ea202ab95..d2fdce901 100644
--- a/real_robots/constants.py
+++ b/real_robots/constants.py
@@ -1,3 +1,4 @@
+# coding=utf-8
 from __future__ import print_function, absolute_import, division
 
 import numpy as np
@@ -8,9 +9,11 @@
 SERVER_PORT = 7777
 HOSTNAME = 'localhost'
 USING_REAL_BAXTER = False
-USING_ROBOBO = True
-
-assert not (USING_ROBOBO and USING_REAL_BAXTER), "You can only use one real robot at a time"
+USING_ROBOBO = False
+USING_OMNIROBOT = False
+USING_OMNIROBOT_SIMULATOR = True
+assert sum([USING_REAL_BAXTER, USING_ROBOBO, USING_OMNIROBOT, USING_OMNIROBOT_SIMULATOR]) <= 1, \
+    "You can only use one real robot at a time"
 # For compatibility with teleop_client
 Move = None
 DELTA_POS = 0
@@ -41,7 +44,9 @@
     # Set the second cam topic to None if there is only one camera
     SECOND_CAM_TOPIC = "/camera/rgb/image_raw"
     DATA_FOLDER_SECOND_CAM = "real_baxter_second_cam"
+
 elif USING_ROBOBO:
+
     # ROS Topics
     IMAGE_TOPIC = "/camera/rgb/image_raw"
     # SECOND_CAM_TOPIC = "/camera/image_repub"
@@ -70,7 +75,67 @@ class Move(Enum):
         LEFT = 2
         RIGHT = 3
         STOP = 4
+elif USING_OMNIROBOT or USING_OMNIROBOT_SIMULATOR:
+
+    # Reward definition
+    REWARD_TARGET_REACH = 1
+    REWARD_NOTHING = 0
+    REWARD_BUMP_WALL = -1
+    # ROS Topics
+    IMAGE_TOPIC = "/camera/image_raw"
+   
+    SECOND_CAM_TOPIC = None  # not support currently
+   
+    # Max number of steps per episode
+    MAX_STEPS = 250
+    # Boundaries
+    MIN_X, MAX_X = -0.85, 0.85  # center of robot should be in this interval
+    MIN_Y, MAX_Y = -0.85, 0.85
+
+    # inital position's boudnaries
+    INIT_MIN_X, INIT_MAX_X = -0.7, 0.7
+    INIT_MIN_Y, INIT_MAX_Y = -0.7, 0.7
+
+    # Target Boundaries
+    TARGET_MIN_X, TARGET_MAX_X = -0.7, 0.7
+    TARGET_MIN_Y, TARGET_MAX_Y = -0.7, 0.7
+
+    # Control frequence when RL is used for controlling velocity or wheelspeeds directly
+    RL_CONTROL_FREQ = 20.0
+
+    # Geometry data of omnirobot
+    OMNIROBOT_L = 0.120  # m
+
+    # error threshold
+    DIST_TO_TARGET_THRESHOLD = 0.2
+
+    # For discrete action, 
+    # Define the possible Moves
+    class Move(Enum):
+        FORWARD = 0
+        BACKWARD = 1
+        LEFT = 2
+        RIGHT = 3
+        STOP = 4
+
+    STEP_DISTANCE = 0.1  # meter, distance for each step
+    
+    # For continuous action,
+    # Define the action_bounds
+    ACTION_POSITIVE_LOW = 0.0
+    ACTION_POSITIVE_HIGH = 0.1
+    ACTION_NEGATIVE_LOW = -0.1
+    ACTION_NEGATIVE_HIGH = 0.0
+
+    # camera info files (generated by ROS node calibrate_camera)
+    CAMERA_INFO_PATH = "real_robots/omnirobot_utils/cam_calib_info.yaml"
 
+    # camera installation info
+    CAMERA_POS_COORD_GROUND = [0, 0, 2.9]  # camera position in ground coordinate [x, y, z]
+    # camera rotation in ground coordinate, euler angle presented in xyz, in degree
+    CAMERA_ROT_EULER_COORD_GROUND = [0, 180, 0]
+    ORIGIN_SIZE = [640, 480]  # camera's original resolution
+    CROPPED_SIZE = [480, 480]  # cropped to a square, attention, this is not the output image size (RENDER_SIZE)
 # Gazebo
 else:
     LEFT_ARM_INIT_POS = [0.6, 0.30, 0.20]
diff --git a/real_robots/omnirobot_server.py b/real_robots/omnirobot_server.py
new file mode 100755
index 000000000..07a494d84
--- /dev/null
+++ b/real_robots/omnirobot_server.py
@@ -0,0 +1,549 @@
+#!/usr/bin/env python
+
+from __future__ import division, print_function, absolute_import
+import rospy
+
+import os
+import time
+
+
+import numpy as np
+import zmq
+import argparse
+import yaml
+import cv2
+from cv_bridge import CvBridge, CvBridgeError
+from sensor_msgs.msg import Image
+from geometry_msgs.msg import Vector3, PoseStamped, Twist
+from std_msgs.msg import Bool
+from omnirobot_msgs.msg import WheelSpeeds
+
+
+from tf.transformations import euler_from_quaternion, quaternion_from_euler
+
+from real_robots.constants import *
+from real_robots.utils import sendMatrix
+from real_robots.omnirobot_utils.omnirobot_manager_base import OmnirobotManagerBase
+assert USING_OMNIROBOT, "Please set USING_OMNIROBOT to True in real_robots/constants.py"
+
+NO_TARGET_MODE = False
+
+bridge = CvBridge()
+should_exit = [False]
+
+
+class OmniRobot(object):
+    def __init__(self, init_x, init_y, init_yaw):
+        """
+        Class for controlling omnirobot based on topic mechanism of ros
+        """
+
+        super(OmniRobot, self).__init__()
+
+        # Initialize the direction
+        self.init_pos = [init_x, init_y]
+        self.init_yaw = init_yaw
+
+        # OmniRobot's real position on the grid
+        self.robot_pos = [0, 0]
+        self.robot_yaw = 0  # in rad
+
+        # OmniRobot's position command on the grid
+        self.robot_pos_cmd = self.init_pos[:]
+        self.robot_yaw_cmd = self.init_yaw
+
+        # Target's real position on the grid
+        self.target_pos = [0, 0]
+        self.target_yaw = 0
+
+        # status of moving
+        self.move_finished = False
+        self.target_pos_changed = False
+
+        # Distance for each step
+        self.step_distance = STEP_DISTANCE
+
+        self.visual_robot_sub = rospy.Subscriber(
+            "/visual_robot_pose", PoseStamped, self.visualRobotCallback, queue_size=10)
+        self.visual_target_sub = rospy.Subscriber(
+            "/visual_target_pose", PoseStamped, self.visualTargetCallback, queue_size=10)
+
+        self.pos_cmd_pub = rospy.Publisher(
+            "/position_commands", Vector3, queue_size=10)
+        self.move_finished_sub = rospy.Subscriber(
+            "/finished", Bool, self.moveFinishedCallback, queue_size=10)
+        self.stop_signal_pub = rospy.Publisher("/stop", Bool, queue_size=10)
+        self.reset_odom_pub = rospy.Publisher(
+            "/reset_odom", Vector3, queue_size=10)
+        self.reset_signal_pub = rospy.Publisher("/reset", Bool, queue_size=10)
+        self.switch_velocity_controller_pub = rospy.Publisher("/switch/velocity_controller", Bool, queue_size=10)
+        self.switch_pos_controller_pub = rospy.Publisher("/switch/pos_controller", Bool, queue_size=10)
+
+        self.wheel_speeds_command_pub = rospy.Publisher("/wheel_speeds_commands", WheelSpeeds, queue_size=10)
+        self.velocity_command_pub = rospy.Publisher("/velocity_commands", Twist,  queue_size=10)
+        # known issues, without sleep 1 second, publishers could not been setup
+        rospy.sleep(1)
+        # https://answers.ros.org/question/9665/test-for-when-a-rospy-publisher-become-available/?answer=14125#post-id-14125
+
+        # enable pos_controller, velocity_controller by default
+        self.enabled_pos_controller = True
+        self.enabled_velocity_controller = True
+        self.switch_pos_controller_pub.publish(Bool(True))
+        self.switch_velocity_controller_pub.publish(Bool(True))
+
+    def setRobotCmdConstrained(self, x, y, yaw):
+        """
+        set the position command for the robot, the command will be automatically constrained
+        x, y, yaw are in the global frame
+        Note: the command will be not published until pubPosCmd() is called
+        """
+        self.robot_pos_cmd[0] = max(x, MIN_X)
+        self.robot_pos_cmd[0] = min(x, MAX_X)
+
+        self.robot_pos_cmd[1] = max(y, MIN_Y)
+        self.robot_pos_cmd[1] = min(y, MAX_Y)
+        self.robot_yaw_cmd = self.normalizeAngle(yaw)
+
+    def setRobotCmd(self, x, y, yaw):
+        """
+        set the position command for the robot
+        x, y, yaw are in the global frame
+        Note: the command will be not published until pubPosCmd() is called
+        """
+        self.robot_pos_cmd[0] = x
+        self.robot_pos_cmd[1] = y
+        self.robot_yaw_cmd = self.normalizeAngle(yaw)
+
+    def pubPosCmd(self):
+        """
+        Publish the position command for the robot
+        x, y, yaw are in the global frame
+        """
+        assert self.enabled_pos_controller and self.enabled_velocity_controller, \
+            "pos_controller and velocity_controller should be both enabled to execute positional command"
+        msg = Vector3(
+            self.robot_pos_cmd[0], self.robot_pos_cmd[1], self.robot_yaw_cmd)
+        self.pos_cmd_pub.publish(msg)
+        self.move_finished = False
+        print("move to x: {:.4f} y:{:.4f} yaw: {:.4f}".format(
+            msg.x, msg.y, msg.z))
+
+    def resetOdom(self, x, y, yaw):
+        """
+        The odometry of robot will be reset to x, y, yaw (in global frame)
+        """
+        msg = Vector3()
+        msg.x = x
+        msg.y = y
+        msg.z = yaw
+        self.reset_odom_pub.publish(msg)
+
+    def reset(self):
+        """
+        Publish the reset signal to robot (quit the stop state)
+        The odometry will not be reset automatically
+        """
+        msg = Bool()
+        self.reset_signal_pub.publish(msg)
+
+    def stop(self):
+        """
+        Publish the stop signal to robot
+        """
+        msg = Bool()
+        msg.data = True
+        self.stop_signal_pub.publish(msg)
+        self.move_finished = True
+
+    def visualRobotCallback(self, pose_stamped_msg):
+        """
+        Callback for ROS topic
+        Get the new updated position of robot from camera
+        :param pose_stamped_msg: (PoseStamped ROS message)
+        """
+        self.robot_pos[0] = pose_stamped_msg.pose.position.x
+        self.robot_pos[1] = pose_stamped_msg.pose.position.y
+        self.robot_yaw = euler_from_quaternion([pose_stamped_msg.pose.orientation.x, pose_stamped_msg.pose.orientation.y,
+                                                pose_stamped_msg.pose.orientation.z, pose_stamped_msg.pose.orientation.w])[2]
+
+        if NO_TARGET_MODE and self.target_pos_changed:
+            # simulate the target's position update
+            self.target_pos[0] = 0.0
+            self.target_pos[1] = 0.0
+            self.target_yaw = 0.0
+            self.target_pos_changed = False
+
+    def visualTargetCallback(self, pose_stamped_msg):
+        """
+        Callback for ROS topic
+        Get the new updated position of robot from camera
+        Only update when target position should have been moved (eg. reset env)
+        :param pose_stamped_msg: (PoseStamped ROS message)
+        """
+
+        if self.target_pos_changed:
+            if pose_stamped_msg.pose.position.x < TARGET_MAX_X and pose_stamped_msg.pose.position.x > TARGET_MIN_X  \
+                    and pose_stamped_msg.pose.position.y > TARGET_MIN_Y and pose_stamped_msg.pose.position.y < TARGET_MAX_Y:
+                self.target_pos[0] = pose_stamped_msg.pose.position.x
+                self.target_pos[1] = pose_stamped_msg.pose.position.y
+                self.target_yaw = euler_from_quaternion([pose_stamped_msg.pose.orientation.x, pose_stamped_msg.pose.orientation.y,
+                                                         pose_stamped_msg.pose.orientation.z, pose_stamped_msg.pose.orientation.w])[2]
+                self.target_pos_changed = False
+
+    def moveFinishedCallback(self, move_finished_msg):
+        """
+        Callback for ROS topic
+        receive 'finished' signal when robot moves to the target
+        """
+        self.move_finished = move_finished_msg.data
+
+    def forward(self):
+        """
+        Move one step forward (Translation)
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0] + self.step_distance, self.robot_pos_cmd[1], self.robot_yaw_cmd)
+        self.pubPosCmd()
+
+    def backward(self):
+        """
+        Move one step backward
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0] - self.step_distance, self.robot_pos_cmd[1], self.robot_yaw_cmd)
+        self.pubPosCmd()
+
+    def left(self):
+        """
+        Translate to the left
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0], self.robot_pos_cmd[1] + self.step_distance, self.robot_yaw_cmd)
+        self.pubPosCmd()
+
+    def right(self):
+        """
+        Translate to the right
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0], self.robot_pos_cmd[1] - self.step_distance, self.robot_yaw_cmd)
+        self.pubPosCmd()
+
+    def moveContinous(self, action):
+        """
+        Perform a continuous displacement of dx, dy
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0] + action[0], self.robot_pos_cmd[1] + action[1], self.robot_yaw_cmd)
+        self.pubPosCmd()
+
+    def disableVelocityController(self):
+        """
+        Disable the velocity controller, this server send the wheel_speed_command instead
+        """
+        msg = Bool(False)
+        self.switch_velocity_controller_pub.publish(msg)
+        self.enabled_velocity_controller = False
+
+    def enableVelocityController(self):
+        """
+        enable the velocity controller
+        """
+        msg = Bool(True)
+        self.switch_velocity_controller_pub.publish(msg)
+        self.enabled_velocity_controller = True
+    
+    def disablePosController(self):
+        """
+        Disable the pos controller
+        """
+        msg = Bool(False)
+        self.switch_pos_controller_pub.publish(msg)
+        self.enabled_pos_controller = False
+
+    def enablePosController(self):
+        """
+        enable the pos controller
+        """
+        msg = Bool(True)
+        self.switch_pos_controller_pub.publish(msg)
+        self.enabled_pos_controller = True
+
+    def moveByWheelsCmd(self, left_speed, front_speed, right_speed):
+        """
+        Send wheel speeds command to robot directly
+        Attention: to use this function, you should firstly 
+                   make sure pos_controller and velocity_controller are disabled. 
+        A single wheel speed commands will only be executed within 1 second maximum,  
+        after 1 second, if no new command come in, the robot will brake, thus make 
+        sure this function called by a frequency more than 1Hz.
+        In contrary, if new wheel speed commands arrives within 1 second, the robot
+        will execute new command immediately. 
+        
+        :param left_speed: (float) linear speed of left wheel (meter/s)
+        :param front_speed: (float) linear speed of front wheel (meter/s)
+        :param right_speed: (float) linear speed of right wheel (meter/s)
+        """
+        assert self.enabled_pos_controller == False and self.enabled_velocity_controller == False,\
+            "you should disable pos_controller and velocity controller before controlling wheel speed directly"
+        wheel_speed_msg = WheelSpeeds()
+        wheel_speed_msg.stamp = rospy.now()
+        wheel_speed_msg.left = left_speed
+        wheel_speed_msg.front = front_speed
+        wheel_speed_msg.right = right_speed
+        self.wheel_speeds_command_pub.publish(wheel_speed_msg)
+
+    def moveByVelocityCmd(self, speed_x, speed_y, speed_yaw):
+        """
+        Send velocity command to robot directly, the velocity should be presented in robot local frame.
+        positive x: front, positive y: left
+        Attention: to use this function, you should firstly 
+                   make sure pos_controller is disabled. 
+        A single velocity commands will only be executed within 1 second maximum, 
+        after 1 second, if no new command come in, the robot will brake, thus make 
+        sure this function called by a frequency more than 1Hz.
+        In contrary, if new velocity commands arrives within 1 second, the robot 
+        will execute new command immediately. 
+        
+        :param speed_x: (float) linear speed along x-axis (m/s) (forward-backward), in robot local coordinate
+        :param speed_y: (float) linear speed along y-axis (m/s) (left-right), in robot local coordinate
+        :param speed_yaw: (float) rotation speed of robot around z-axis (rad/s), in robot local coordinate
+        """
+        assert self.enabled_pos_controller == False and self.enabled_velocity_controller == True, \
+            "you should disable pos_controller but enable velocity controller before controlling robot velocity directly"
+        velocity_command_msg = Twist()
+        velocity_command_msg.linear.x = speed_x
+        velocity_command_msg.linear.y = speed_y
+        velocity_command_msg.linear.z = 0
+        velocity_command_msg.angular.x = 0
+        velocity_command_msg.angular.y = 0
+        velocity_command_msg.angular.z = speed_yaw
+        self.velocity_command_pub.publish(velocity_command_msg)
+        
+
+    @staticmethod
+    def normalizeAngle(angle):
+        """
+        :param angle: (float) (in rad)
+        :return: (float) the angle in [-pi, pi] (in rad)
+        """
+        while angle > np.pi:
+            angle -= 2 * np.pi
+        while angle < -np.pi:
+            angle += 2 * np.pi
+        return angle
+
+
+class ImageCallback(object):
+    """
+    Image callback for ROS
+    """
+
+    def __init__(self, camera_matrix, distortion_coefficients):
+        super(ImageCallback, self).__init__()
+        self.valid_img = None
+        self.valid_box = None
+        self.first_msg = True
+        self.camera_matrix = camera_matrix
+        self.distortion_coefficients = distortion_coefficients
+
+    def imageCallback(self, msg):
+        try:    
+            # Convert your ROS Image message to OpenCV
+            cv2_img = bridge.imgmsg_to_cv2(msg, "rgb8")
+            
+            if self.first_msg:
+                shape = cv2_img.shape
+                min_length = min(shape[0], shape[1])
+                up_margin = int((shape[0] - min_length) / 2)  # row
+                left_margin = int((shape[1] - min_length) / 2)  # col
+                self.valid_box = [up_margin, up_margin + min_length, left_margin, left_margin + min_length]
+                print("origin size: {}x{}".format(shape[0],shape[1]))
+                print("crop each image to a square image, cropped size: {}x{}".format(min_length, min_length))
+                self.first_msg = False
+            
+            undistort_image = cv2.undistort(cv2_img, self.camera_matrix, self.distortion_coefficients)
+            self.valid_img = undistort_image[self.valid_box[0]:self.valid_box[1], self.valid_box[2]:self.valid_box[3]]
+
+        except CvBridgeError as e:
+            print("CvBridgeError:", e)
+
+def saveSecondCamImage(im, episode_folder, episode_step, path="omnirobot_2nd_cam"):
+    """
+    Write an image to disk
+    :param im: (numpy matrix) BGR image
+    :param episode_folder: (str)
+    :param episode_step: (int)
+    :param path: (str)
+    """
+    image_path = "{}/{}/frame{:06d}.jpg".format(
+        path, episode_folder, episode_step)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    cv2.imwrite("srl_zoo/data/{}".format(image_path), im)
+
+
+def waitTargetUpdate(omni_robot, timeout):
+    """
+    Wait for target updating
+    :param omni_robot: (OmniRobot)
+    :param timeout: (time to wait for updating)
+    """
+    omni_robot.target_pos_changed = True
+    time = 0.0  # second
+    while time < timeout and not rospy.is_shutdown():
+        if not omni_robot.target_pos_changed:  # updated
+            return True
+        else:
+            rospy.sleep(0.1)
+            time += 0.1
+    return False
+
+class OmnirobotManager(OmnirobotManagerBase):
+    """
+    Omnirobot magager for real robot
+    """
+    def __init__(self):
+        super(OmnirobotManager, self).__init__(second_cam_topic=SECOND_CAM_TOPIC)
+        self.robot = OmniRobot(0,0,0) # assign the real robot object to manager
+        self.episode_idx = 0
+        self.episode_step = 0
+
+    def resetEpisode(self):
+        """
+        override orignal method
+        Give the correct sequance of commands to the robot 
+        to rest environment between the different episodes
+        """
+        if self.second_cam_topic is not None:
+            assert NotImplementedError
+            episode_folder = "record_{:03d}".format(episode_idx)
+            try:
+                os.makedirs(
+                    "srl_zoo/data/{}/{}".format(DATA_FOLDER_SECOND_CAM, episode_folder))
+            except OSError:
+                pass
+
+        print('Environment reset, choose random position')
+        self.episode_idx += 1
+        self.episode_step = 0
+        self.robot.reset()
+
+        # Env reset
+        random_init_position = self.sampleRobotInitalPosition()
+        self.robot.setRobotCmd(random_init_position[0], random_init_position[1], 0)
+        self.robot.pubPosCmd()
+
+        while True:  # check the new target can be seen
+            if not NO_TARGET_MODE:
+                raw_input(
+                    "please set the target position, then press 'enter' !")
+
+            if waitTargetUpdate(self.robot, timeout=0.5):
+                break
+            else:
+                print("Can't see the target, please move it into the boundary!")
+
+
+if __name__ == '__main__':
+    with open(CAMERA_INFO_PATH, 'r') as stream:
+        try:
+            contents = yaml.load(stream)
+            camera_matrix = np.array(contents['camera_matrix']['data']).reshape((3,3))
+            distortion_coefficients = np.array(
+                contents['distortion_coefficients']['data']).reshape((1, 5))
+        except yaml.YAMLError as exc:
+            print(exc)
+    rospy.init_node('omni_robot_server', anonymous=True)
+    # warning for no target mode
+    if NO_TARGET_MODE:
+        rospy.logwarn(
+            "ATTENTION: This script is running under NO TARGET mode!!!")
+    # Connect to ROS Topics
+    if IMAGE_TOPIC is not None:
+        image_cb_wrapper = ImageCallback(camera_matrix, distortion_coefficients)
+        img_sub = rospy.Subscriber(
+            IMAGE_TOPIC, Image, image_cb_wrapper.imageCallback, queue_size=1)
+
+    if SECOND_CAM_TOPIC is not None:
+        assert NotImplementedError
+        image_cb_wrapper_2 = ImageCallback(camera_matrix, distortion_coefficients)
+        img_2_sub = rospy.Subscriber(
+            SECOND_CAM_TOPIC, Image, image_cb_wrapper_2.imageCallback, queue_size=1)
+
+    print('Starting up on port number {}'.format(SERVER_PORT))
+    context = zmq.Context()
+    socket = context.socket(zmq.PAIR)
+
+    socket.bind("tcp://*:{}".format(SERVER_PORT))
+
+    print("Waiting for client...")
+    socket.send_json({'msg': 'hello'})
+    print("Connected to client")
+
+    action = 0
+    episode_step = 0
+    episode_idx = -1
+    episode_folder = None
+
+    omnirobot_manager = OmnirobotManager()
+    omnirobot_manager.robot = OmniRobot(0, 0, 0)  # yaw is in rad
+    omnirobot_manager.robot.stop()  # after stop, the robot need to be reset
+    omnirobot_manager.robot.resetOdom(0, 0, 0)
+    omnirobot_manager.robot.reset()
+
+    omnirobot_manager.robot.pubPosCmd()
+    r = rospy.Rate(RL_CONTROL_FREQ) 
+    while not rospy.is_shutdown():
+        print("wait for new command")
+        msg = socket.recv_json()
+
+        print("msg: {}".format(msg))
+        omnirobot_manager.processMsg(msg)
+
+        # wait for robot to finish the action, timeout 30s
+        timeout = 30  # second
+        for i in range(timeout):
+            readable_list, _, _ = zmq.select([socket], [], [], 0)
+
+            if len(readable_list) > 0:
+                print("New command incomes, ignore the current command")
+                continue
+            if omnirobot_manager.robot.move_finished:
+                print("action done")
+                break
+
+            elif i == timeout - 1:
+                print("Error: timeout for action finished signal")
+                exit()
+            time.sleep(1)
+
+        if IMAGE_TOPIC is not None:
+            # Retrieve last image from image topic
+            original_image = np.copy(image_cb_wrapper.valid_img)
+
+        print("reward: {}".format(omnirobot_manager.reward))
+        print("omni_robot position", omnirobot_manager.robot.robot_pos)
+        print("target position", omnirobot_manager.robot.target_pos)
+        socket.send_json(
+            {
+                # XYZ position
+                "position": omnirobot_manager.robot.robot_pos,
+                "reward": omnirobot_manager.reward,
+                "target_pos": omnirobot_manager.robot.target_pos
+            },
+            flags=zmq.SNDMORE if IMAGE_TOPIC is not None else 0
+        )
+
+        if SECOND_CAM_TOPIC is not None:
+            saveSecondCamImage(image_cb_wrapper_2.valid_img,
+                               episode_folder, episode_step, DATA_FOLDER_SECOND_CAM)
+            episode_step += 1
+
+        if IMAGE_TOPIC is not None:
+            # to contiguous, otherwise ZMQ will complain
+            img = np.ascontiguousarray(original_image, dtype=np.uint8)
+            sendMatrix(socket, img)
+        r.sleep()
+
+    print("Exiting server - closing socket...")
+    socket.close()
diff --git a/real_robots/omnirobot_simulator_server.py b/real_robots/omnirobot_simulator_server.py
new file mode 100755
index 000000000..879eae4a7
--- /dev/null
+++ b/real_robots/omnirobot_simulator_server.py
@@ -0,0 +1,446 @@
+from __future__ import division, print_function, absolute_import
+
+from multiprocessing import Process, Pipe
+import yaml
+import cv2
+# Konwn issue: - No module named 'scipy.spatial.transform', To resolve, try pip3 install scipy==1.2
+from scipy.spatial.transform import Rotation as R
+
+from real_robots.constants import *
+from real_robots.omnirobot_utils.marker_finder import MakerFinder
+from real_robots.omnirobot_utils.marker_render import MarkerRender
+from real_robots.omnirobot_utils.omnirobot_manager_base import OmnirobotManagerBase
+from real_robots.omnirobot_utils.utils import PosTransformer
+
+assert USING_OMNIROBOT_SIMULATOR, "Please set USING_OMNIROBOT_SIMULATOR to True in real_robots/constants.py"
+NOISE_VAR_ROBOT_POS = 0.01  # meter
+NOISE_VAR_ROBOT_YAW = np.pi/180 * 2.5  # 5 Deg
+NOISE_VAR_TARGET_PIXEL = 2  # pixel noise on target marker
+NOISE_VAR_ROBOT_PIXEL = 2
+NOISE_VAR_ENVIRONMENT = 0.03  # pixel noise of final image on LAB space
+NOISE_VAR_ROBOT_SIZE_PROPOTION = 0.05  # noise of robot size propotion
+NOISE_VAR_TARGET_SIZE_PROPOTION = 0.05
+
+
+class OmniRobotEnvRender():
+    def __init__(self, init_x, init_y, init_yaw, origin_size, cropped_size,
+                 back_ground_path, camera_info_path,
+                 robot_marker_path, robot_marker_margin, target_marker_path, target_marker_margin,
+                 robot_marker_code, target_marker_code,
+                 robot_marker_length, target_marker_length, output_size, **_):
+        """
+        Class for rendering Omnirobot environment
+        :param init_x: (float) initial x position of robot
+        :param init_y: (float) initial y position of robot
+        :param init_yaw: (float) initial yaw position of robot
+        :param origin_size: (list of int) original camera's size (eg. [640,480]), the camera matrix should be corresponding to this size
+        :param cropped_size: (list of int) cropped image's size (eg. [480,480])
+        :param back_ground_path: (str) back ground image's path, the image should be undistorted.
+        :param camera_info_path: (str) camera info file's path (containing camera matrix)
+        :param robot_marker_path: (str) robot maker's path, the marker should have a margin with several pixels 
+        :param robot_marker_margin: (list of int) marker's margin (eg. [3,3,3,3])
+        :param target_marker_path: (str) target maker's path, the marker should have a margin with several pixels 
+        :param target_marker_margin: (list of int) marker's margin (eg. [3,3,3,3])
+        :param robot_marker_code: (currently not supported, should be "None" by default) (numpy ndarray) optional, the code of robot marker, only used for detecting position directly from the image.
+        :param target_marker_code: (currently not supported, should be "None" by default) (numpy ndarray) optional, the code of target marker, only used for detecting position directly from the image.
+        :param robot_marker_length: (float) the physical length of the marker (in meter)
+        :param target_marker_length: (float) the physical length of the marker (in meter)
+        :param output_size: (list of int) the output image's size (eg. [224,224])
+        :param **_: other input params not used, so they are dropped
+        """
+        super(OmniRobotEnvRender, self).__init__()
+
+        self.output_size = output_size
+
+        # store the size of robot marker
+        self.robot_marker_size_proprotion = 1.0
+
+        # Initialize the direction
+        self.init_pos = [init_x, init_y]
+        self.init_yaw = init_yaw
+
+        # OmniRobot's real position on the grid
+        self.robot_pos = np.float32([0, 0])
+        self.robot_yaw = 0  # in rad
+
+        # Last velocity command, used for simulating the controlling of velocity directly
+        self.last_linear_velocity_cmd = np.float32(
+            [0, 0])  # in m/s, in robot local frame
+        self.last_rot_velocity_cmd = 0  # in rad/s
+
+        # last wheel speeds command, used for simulating the controlling of wheel speed directly
+        # [left_speed, front_speed, right_speed]
+        self.last_wheel_speeds_cmd = np.float32([0, 0, 0])
+
+        # OmniRobot's position command on the grid
+        self.robot_pos_cmd = np.float32(self.init_pos[:])
+        self.robot_yaw_cmd = self.init_yaw
+
+        # Target's set position on the grid
+        self.target_pos_cmd = np.float32([0, 0])
+        self.target_yaw_cmd = 0.0
+
+        # Target's real position on the grid
+        self.target_pos = np.float32([0, 0])
+        self.target_yaw = 0
+
+        # status of moving
+        self.move_finished = False
+        self.target_pos_changed = False
+
+        # Distance for each step
+        self.step_distance = STEP_DISTANCE
+
+        with open(camera_info_path, 'r') as stream:
+            try:
+                contents = yaml.load(stream)
+                camera_matrix = np.array(contents['camera_matrix']['data'])
+                self.origin_size = np.array(
+                    [contents['image_height'], contents['image_width']])
+                self.camera_matrix = np.reshape(camera_matrix, (3, 3))
+                self.dist_coeffs = np.array(
+                    contents["distortion_coefficients"]["data"]).reshape((1, 5))
+            except yaml.YAMLError as exc:
+                print(exc)
+        self.cropped_size = [np.min(self.origin_size), np.min(
+            self.origin_size)]  # size after being cropped
+
+        # restore the image before being cropped
+        self.bg_img = np.zeros([*self.origin_size, 3], np.uint8)
+
+        self.cropped_margin = (self.origin_size - self.cropped_size)/2.0
+        self.cropped_range = np.array([self.cropped_margin[0], self.cropped_margin[0]+self.cropped_size[0],
+                                       self.cropped_margin[1],
+                                       self.cropped_margin[1]+self.cropped_size[1]]).astype(np.int)
+
+        back_ground_img = cv2.imread(back_ground_path)
+        if(back_ground_img.shape[0:2] != self.cropped_size):
+            print("input back ground image's size: ", back_ground_img.shape)
+            print("resize to ", self.cropped_size)
+            self.bg_img[self.cropped_range[0]:self.cropped_range[1], self.cropped_range[2]:self.cropped_range[3], :] \
+                = cv2.resize(back_ground_img, tuple(self.cropped_size))  # background image
+        else:
+            self.bg_img[self.cropped_range[0]:self.cropped_range[1], self.cropped_range[2]:self.cropped_range[3], :] \
+                = back_ground_img  # background image
+
+        self.bg_img = cv2.undistort(
+            self.bg_img, self.camera_matrix, self.dist_coeffs)
+        # Currently cannot find a solution to re-distort a image...
+
+        self.target_bg_img = self.bg_img  # background image with target.
+        self.image = self.bg_img  # image with robot and target
+
+        # camera installation info
+        r = R.from_euler('xyz', CAMERA_ROT_EULER_COORD_GROUND, degrees=True)
+        camera_rot_mat_coord_ground = r.as_dcm()
+
+        self.pos_transformer = PosTransformer(self.camera_matrix, self.dist_coeffs,
+                                              CAMERA_POS_COORD_GROUND, camera_rot_mat_coord_ground)
+
+        self.target_render = MarkerRender(noise_var=NOISE_VAR_TARGET_PIXEL)
+        self.robot_render = MarkerRender(noise_var=NOISE_VAR_ROBOT_PIXEL)
+        self.robot_render.setMarkerImage(cv2.imread(
+            robot_marker_path, cv2.IMREAD_COLOR), robot_marker_margin)
+        self.target_render.setMarkerImage(cv2.imread(
+            target_marker_path, cv2.IMREAD_COLOR), target_marker_margin)
+
+        if robot_marker_code is not None and target_marker_code is not None:
+            self.marker_finder = MakerFinder(camera_info_path)
+            self.marker_finder.setMarkerCode(
+                'robot', robot_marker_code, robot_marker_length)
+            self.marker_finder.setMarkerCode(
+                'target', target_marker_code, target_marker_length)
+
+    def renderEnvLuminosityNoise(self, origin_image, noise_var=0.1, in_RGB=False, out_RGB=False):
+        """
+        render the different environment luminosity
+        """
+        # variate luminosity and color
+        origin_image_LAB = cv2.cvtColor(
+            origin_image, cv2.COLOR_RGB2LAB if in_RGB else cv2.COLOR_BGR2LAB, cv2.CV_32F)
+        origin_image_LAB[:, :, 0] = origin_image_LAB[:,
+                                                     :, 0] * (np.random.randn() * noise_var + 1.0)
+        origin_image_LAB[:, :, 1] = origin_image_LAB[:,
+                                                     :, 1] * (np.random.randn() * noise_var + 1.0)
+        origin_image_LAB[:, :, 2] = origin_image_LAB[:,
+                                                     :, 2] * (np.random.randn() * noise_var + 1.0)
+        out_image = cv2.cvtColor(
+            origin_image_LAB, cv2.COLOR_LAB2RGB if out_RGB else cv2.COLOR_LAB2BGR, cv2.CV_8UC3)
+        return out_image
+
+    def renderTarget(self):
+        """
+        render the target
+        """
+        self.target_bg_img = self.target_render.addMarker(self.bg_img,
+                                                          self.pos_transformer.phyPosGround2PixelPos(
+                                                              self.target_pos.reshape(2, 1)),
+                                                          self.target_yaw, np.random.randn() * NOISE_VAR_TARGET_SIZE_PROPOTION + 1.0)
+
+    def renderRobot(self):
+        """
+        render the image.
+        """
+        self.image = self.robot_render.addMarker(self.target_bg_img,
+                                                 self.pos_transformer.phyPosGround2PixelPos(
+                                                     self.robot_pos.reshape(2, 1)),
+                                                 self.robot_yaw, self.robot_marker_size_proprotion)
+
+    def getCroppedImage(self):
+        return self.image[self.cropped_range[0]:self.cropped_range[1], self.cropped_range[2]:self.cropped_range[3], :]
+
+    def findMarkers(self):
+        assert NotImplementedError
+        # this is not tested
+        tags_trans_coord_cam, tags_rot_coord_cam = self.marker_finder.getMarkerPose(
+            self.image, ['robot', 'target'], True)
+        if 'robot' in tags_trans_coord_cam.keys():
+            self.robot_pos = self.pos_transformer.phyPosCam2PhyPosGround(
+                tags_trans_coord_cam['robot'])
+            tag_rot_coord_ground = np.matmul(
+                self.pos_transformer.camera_2_ground_trans[0:3, 0:3], tags_rot_coord_cam['robot'])[0:3, 0:3]
+            self.robot_yaw = R.from_dcm(
+                tag_rot_coord_ground).as_euler('zyx', degree=False)
+            print("robot_error: ". self.robot_pos - self.robot_pos_cmd)
+            print("robot_yaw_error: ". self.robot_yaw - self.robot_yaw_cmd)
+
+        if 'target' in tags_trans_coord_cam.keys():
+            self.target_pos = self.pos_transformer.phyPosCam2PhyPosGround(
+                tags_trans_coord_cam['target'])
+            tag_rot_coord_ground = np.matmul(self.pos_transformer.camera_2_ground_trans[0:3, 0:3],
+                                             tags_rot_coord_cam['target'])[0:3, 0:3]
+            self.target_yaw = R.from_dcm(
+                tag_rot_coord_ground).as_euler('zyx', degree=False)
+            print("target_error: ", self.target_pos - self.target_pos_cmd)
+            print("target_yaw_error: ", self.target_yaw - self.target_yaw_cmd)
+
+    def setRobotCmdConstrained(self, x, y, yaw):
+        self.robot_pos_cmd[0] = max(x, MIN_X)
+        self.robot_pos_cmd[0] = min(x, MAX_X)
+
+        self.robot_pos_cmd[1] = max(y, MIN_Y)
+        self.robot_pos_cmd[1] = min(y, MAX_Y)
+        self.robot_yaw_cmd = self.normalizeAngle(yaw)
+
+    def setRobotCmd(self, x, y, yaw):
+        self.robot_pos_cmd[0] = x
+        self.robot_pos_cmd[1] = y
+        self.robot_yaw_cmd = self.normalizeAngle(yaw)
+
+        self.robot_pos = self.robot_pos_cmd + \
+            np.random.randn(2) * NOISE_VAR_ROBOT_POS  # add noise
+        self.robot_yaw = self.normalizeAngle(
+            self.robot_yaw_cmd + np.random.randn() * NOISE_VAR_ROBOT_YAW)  # add noise
+
+    def setTargetCmd(self, x, y, yaw):
+        self.target_pos_cmd[0] = x
+        self.target_pos_cmd[1] = y
+        self.target_yaw_cmd = self.normalizeAngle(yaw)
+
+        self.target_pos = self.target_pos_cmd
+        self.target_yaw = self.normalizeAngle(self.target_yaw_cmd)
+
+    def forward(self, action=None):
+        """
+        Move one step forward (Translation)
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0] + self.step_distance, self.robot_pos_cmd[1], self.robot_yaw_cmd)
+
+    def backward(self, action=None):
+        """
+        Move one step backward
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0] - self.step_distance, self.robot_pos_cmd[1], self.robot_yaw_cmd)
+
+    def left(self, action=None):
+        """
+        Translate to the left
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0], self.robot_pos_cmd[1] + self.step_distance, self.robot_yaw_cmd)
+
+    def right(self, action=None):
+        """
+        Translate to the right
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0], self.robot_pos_cmd[1] - self.step_distance, self.robot_yaw_cmd)
+
+    def moveContinous(self, action):
+        """
+        Perform a continuous displacement of dx, dy
+        """
+        self.setRobotCmd(
+            self.robot_pos_cmd[0] + action[0], self.robot_pos_cmd[1] + action[1], self.robot_yaw_cmd)
+
+    def moveByVelocityCmd(self, speed_x, speed_y, speed_yaw):
+        """
+        simuate the robot moved by velocity command
+        This function is assumed to be called at a frequency RL_CONTROL_FREQ in the simulation world
+
+        :param speed_x: (float) linear speed along x-axis (m/s) (forward-backward), in robot local coordinate
+        :param speed_y: (float) linear speed along y-axis (m/s) (left-right), in robot local coordinate
+        :param speed_yaw: (float) rotation speed of robot around z-axis (rad/s), in robot local coordinate
+        """
+        # calculate the robot position that it should be at this moment, so it should be driven by last command
+        # Assume in 1/RL_CONTROL_FREQ, the heading remains the same (not true,
+        #   but should be approximately work if RL_CONTROL_FREQ is high enough)
+        # translate the last velocity cmd in robot local coordiante to position cmd in gound coordiante
+        cos_direction = np.cos(self.robot_yaw)
+        sin_direction = np.sin(self.robot_yaw)
+
+        ground_pos_cmd_x = self.robot_pos[0] + (self.last_linear_velocity_cmd[0] *
+                                                cos_direction - self.last_linear_velocity_cmd[1] * sin_direction)/RL_CONTROL_FREQ
+        ground_pos_cmd_y = self.robot_pos[1] + (self.last_linear_velocity_cmd[1] *
+                                                cos_direction + self.last_linear_velocity_cmd[0] * sin_direction)/RL_CONTROL_FREQ
+        ground_yaw_cmd = self.robot_yaw + self.last_rot_velocity_cmd/RL_CONTROL_FREQ
+        self.setRobotCmd(ground_pos_cmd_x, ground_pos_cmd_y, ground_yaw_cmd)
+
+        # save the command of this moment
+        self.last_linear_velocity_cmd[0] = speed_x
+        self.last_linear_velocity_cmd[1] = speed_y
+        self.last_rot_velocity_cmd = speed_yaw
+
+    def moveByWheelsCmd(self, left_speed, front_speed, right_speed):
+        """
+        simuate the robot moved by wheel speed command
+        This function is assumed to be called at a frequency RL_CONTROL_FREQ in the simulation world
+
+        :param left_speed: (float) linear speed of left wheel (meter/s)
+        :param front_speed: (float) linear speed of front wheel (meter/s)
+        :param right_speed: (float) linear speed of right wheel (meter/s)
+        """
+
+        # calculate the robot position by omnirobot's kinematic equations
+        # Assume in 1/RL_CONTROL_FREQ, the heading remains the same (not true,
+        # but should be approximately work if RL_CONTROL_FREQ is high enough)
+
+        # translate the last wheel speeds cmd in last velocity cmd
+        local_speed_x = self.last_wheel_speeds_cmd[0] / np.sqrt(3.0) \
+            - self.last_wheel_speeds_cmd[2] / np.sqrt(3.0)
+        local_speed_y = - self.last_wheel_speeds_cmd[1] / 1.5 + \
+            self.last_wheel_speeds_cmd[0] / 3.0 + \
+            self.last_wheel_speeds_cmd[2] / 3.0
+        local_rot_speed = - self.last_wheel_speeds_cmd[1] / (3.0 * OMNIROBOT_L) \
+            - self.last_wheel_speeds_cmd[0] / (3.0 * OMNIROBOT_L) \
+            - self.last_wheel_speeds_cmd[2] / (3.0 * OMNIROBOT_L)
+            
+        # translate the last velocity cmd in robot local coordiante to position cmd in gound coordiante
+        cos_direction = np.cos(self.robot_yaw)
+        sin_direction = np.sin(self.robot_yaw)
+
+        ground_pos_cmd_x = self.robot_pos[0] + (local_speed_x *
+                                                cos_direction - local_speed_y * sin_direction)/RL_CONTROL_FREQ
+        ground_pos_cmd_y = self.robot_pos[1] + (local_speed_y *
+                                                cos_direction + local_speed_x * sin_direction)/RL_CONTROL_FREQ
+        ground_yaw_cmd = self.robot_yaw + local_rot_speed/RL_CONTROL_FREQ
+        self.setRobotCmd(ground_pos_cmd_x, ground_pos_cmd_y, ground_yaw_cmd)
+
+        self.last_wheel_speeds_cmd = np.float32(
+            [left_speed, front_speed, right_speed])
+
+    @staticmethod
+    def normalizeAngle(angle):
+        """
+        :param angle: (float) (in rad)
+        :return: (float) the angle in [-pi, pi] (in rad)
+        """
+        while angle > np.pi:
+            angle -= 2 * np.pi
+        while angle < -np.pi:
+            angle += 2 * np.pi
+        return angle
+
+
+class OmniRobotSimulatorSocket(OmnirobotManagerBase):
+    def __init__(self, **args):
+        '''
+        Simulate the zmq socket like real omnirobot server 
+        :param **args  arguments 
+
+        '''
+        super(OmniRobotSimulatorSocket, self).__init__()
+        defalt_args = {
+            "back_ground_path": "real_robots/omnirobot_utils/back_ground.jpg",
+            "camera_info_path": CAMERA_INFO_PATH,
+            "robot_marker_path": "real_robots/omnirobot_utils/robot_margin3_pixel_only_tag.png",
+            "robot_marker_margin": [3, 3, 3, 3],
+            # for black target, use target_margin4_pixel.png",
+            "target_marker_path": "real_robots/omnirobot_utils/red_target_margin4_pixel_480x480.png",
+            "target_marker_margin": [4, 4, 4, 4],
+            "robot_marker_code": None,
+            "target_marker_code": None,
+            "robot_marker_length": 0.18,
+            "target_marker_length": 0.18,
+            "output_size": [224, 224],
+            "init_x": 0,
+            "init_y": 0,
+            "init_yaw": 0,
+            "origin_size": ORIGIN_SIZE,
+            "cropped_size": CROPPED_SIZE
+        }
+        # overwrite the args if it exists
+        self.new_args = {**defalt_args, **args}
+
+        assert len(self.new_args['robot_marker_margin']) == 4
+        assert len(self.new_args['target_marker_margin']) == 4
+        assert len(self.new_args['output_size']) == 2
+
+        self.robot = OmniRobotEnvRender(**self.new_args)
+        self.episode_idx = 0
+        self._random_target = self.new_args["random_target"]
+        self.resetEpisode()  # for a random target initial position
+
+    def resetEpisode(self):
+        """
+        override the original method
+        Give the correct sequance of commands to the robot 
+        to rest environment between the different episodes
+        """
+        if self.second_cam_topic is not None:
+            assert NotImplementedError
+        # Env reset
+        random_init_position = self.sampleRobotInitalPosition()
+        self.robot.setRobotCmd(
+            random_init_position[0], random_init_position[1], 0)
+
+        self.robot_marker_size_proprotion = np.random.randn(
+        ) * NOISE_VAR_ROBOT_SIZE_PROPOTION + 1.0
+
+        # target reset
+        if self._random_target or self.episode_idx == 0:
+            random_init_x = np.random.random_sample() * (TARGET_MAX_X - TARGET_MIN_X) + \
+                TARGET_MIN_X
+            random_init_y = np.random.random_sample() * (TARGET_MAX_Y - TARGET_MIN_Y) + \
+                TARGET_MIN_Y
+            self.robot.setTargetCmd(
+                random_init_x, random_init_y, 2 * np.pi * np.random.rand() - np.pi)
+
+        # render the target and robot
+        self.robot.renderTarget()
+        self.robot.renderRobot()
+
+    def send_json(self, msg):
+        # env send msg to render
+        self.processMsg(msg)
+
+        self.robot.renderRobot()
+
+        self.img = self.robot.getCroppedImage()
+        self.img = self.robot.renderEnvLuminosityNoise(self.img, noise_var=NOISE_VAR_ENVIRONMENT, in_RGB=False,
+                                                       out_RGB=True)
+        self.img = cv2.resize(self.img, tuple(self.robot.output_size))
+
+    def recv_json(self):
+        msg = {
+            # XYZ position
+            "position": self.robot.robot_pos.tolist(),
+            "reward": self.reward,
+            "target_pos": self.robot.target_pos.tolist()
+        }
+        return msg
+
+    def recv_image(self):
+        return self.img
diff --git a/real_robots/omnirobot_utils/__init__.py b/real_robots/omnirobot_utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/real_robots/omnirobot_utils/back_ground.jpg b/real_robots/omnirobot_utils/back_ground.jpg
new file mode 100644
index 000000000..d096df214
Binary files /dev/null and b/real_robots/omnirobot_utils/back_ground.jpg differ
diff --git a/real_robots/omnirobot_utils/cam_calib_info.yaml b/real_robots/omnirobot_utils/cam_calib_info.yaml
new file mode 100644
index 000000000..632acdb63
--- /dev/null
+++ b/real_robots/omnirobot_utils/cam_calib_info.yaml
@@ -0,0 +1,20 @@
+image_width: 640
+image_height: 480
+camera_name: head_camera
+camera_matrix:
+  rows: 3
+  cols: 3
+  data: [513.251745, 0.000000, 325.813473, 0.000000, 514.199291, 251.889606, 0.000000, 0.000000, 1.000000]
+distortion_model: plumb_bob
+distortion_coefficients:
+  rows: 1
+  cols: 5
+  data: [0.080411, -0.130679, 0.004315, 0.000615, 0.000000]
+rectification_matrix:
+  rows: 3
+  cols: 3
+  data: [1.000000, 0.000000, 0.000000, 0.000000, 1.000000, 0.000000, 0.000000, 0.000000, 1.000000]
+projection_matrix:
+  rows: 3
+  cols: 4
+  data: [-502.430063, 12.432139, 342.039831, 8.468118, -504.470966, 270.715015, 0.032387, 0.037207, 0.998782, 0.000000, 1.000000, 0.000000]
diff --git a/real_robots/omnirobot_utils/marker_finder.py b/real_robots/omnirobot_utils/marker_finder.py
new file mode 100644
index 000000000..ad4ac0a7f
--- /dev/null
+++ b/real_robots/omnirobot_utils/marker_finder.py
@@ -0,0 +1,386 @@
+import numpy as np
+import cv2
+from os.path import join
+import yaml
+import argparse
+
+
+def rotateMatrix90(matrix):
+    """
+
+    :param matrix: (array) matrix to rotate of 90 degrees
+    :return: (array)
+    """
+    new_matrix = np.transpose(matrix)
+    new_matrix = np.flip(new_matrix, axis=1)
+    return new_matrix
+
+
+def hammingDistance(string_1, string_2):
+    """
+
+    :param string_1: (str)
+    :param string_2: (str)
+    :return: (int) Hamming distance between string_1 & string_2
+    """
+    assert len(string_1) == len(string_2)
+    return sum(ch1 != ch2 for ch1, ch2 in zip(string_1, string_2))
+
+
+class MakerFinder():
+    """
+    Find the exact physical position of marker in the coordinate of camera.
+    """
+    def __init__(self, camera_info_path):
+        """
+
+        :param camera_info_path: (str)
+        """
+        self.min_area = 70
+        self.marker_code = {}
+        with open(camera_info_path, 'r') as stream:
+            try:
+                contents = yaml.load(stream)
+                camera_matrix = np.array(contents['camera_matrix']['data'])
+                self.origin_size = np.array([contents['image_width'], contents['image_height']])
+                self.camera_matrix = np.reshape(camera_matrix, (3, 3))
+                self.distortion_coefficients = np.array(contents['distortion_coefficients']['data'])
+            except yaml.YAMLError as exc:
+                print(exc)
+        self.marker_img = None
+
+    def setMarkerCode(self, marker_id, marker_code, real_length):
+        """
+        TODO
+        :param marker_id:
+        :param marker_code:
+        :param real_length: (int)
+        :return:
+        """
+        self.marker_code[marker_id] = np.zeros((4, marker_code.shape[0], marker_code.shape[1]))
+        
+        self.marker_code[marker_id][0, :, :] = marker_code
+        for i in range(1, 4):
+            self.marker_code[marker_id][i, :, :] = rotateMatrix90(self.marker_code[marker_id][i-1, :, :])
+        self.marker_rows, self.marker_cols = 90, 90
+        self.marker_square_pts = np.float32([[0, 0], [0, self.marker_cols], [self.marker_rows, self.marker_cols],
+                                             [self.marker_rows, 0]]).reshape(-1, 1, 2)
+        self.marker_real_corners = np.float32([[0, 0, 0], [0, real_length, 0], [real_length, real_length, 0],
+                                                [real_length, 0, 0]]) \
+                                   - np.float32([real_length/2.0, real_length/2.0, 0])
+        self.marker_real_corners = self.marker_real_corners.reshape(-1, 1, 3)
+
+    def intersection(self, l1, l2):
+        """
+        TODO
+        :param l1:
+        :param l2:
+        :return:
+        """
+        vx = l1[0]
+        vy = l1[1]
+        ux = l2[0]
+        uy = l2[1]
+        wx = l2[2]-l1[2]
+        wy = l2[3]-l1[3]
+
+        tmp = vx * uy - vy * ux
+        if tmp == 0:
+            tmp = 1
+
+        s = (vy * wx - vx * wy) / tmp
+        px = l2[2]+s*ux
+        py = l2[3]+s*uy
+
+        return px, py
+    
+    def checkBorder(self, contour, width, height):
+        """
+        TODO
+        :param contour:
+        :param width:
+        :param height:
+        :return:
+        """
+        ret = True
+        for pt in contour:
+            pt = pt[0]
+            if (pt[0] <= 1) or (pt[0] >= width-2) or (pt[1] <= 1) or (pt[1] >= height-2):
+                ret = False
+        return ret
+    
+    def labelSquares(self, img, visualise: bool):
+        """
+        TODO
+        :param img:
+        :param visualise: (bool)
+        :return:
+        """
+        self.gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        self.edge = cv2.adaptiveThreshold(self.gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 31, 5)
+
+        _, cnts, _ = cv2.findContours(self.edge, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
+
+        candidate_contours = []
+        candidate_approx = []
+
+        for contour in cnts:
+            if len(contour) < 50:  # filter the short contour
+                continue
+            approx = cv2.approxPolyDP(contour, cv2.arcLength(contour, True) * 0.035, True)
+            if len(approx) == 4 and self.checkBorder(approx, self.gray.shape[1], self.gray.shape[0]) \
+                and abs(cv2.contourArea(approx)) > self.min_area and cv2.isContourConvex(approx):
+                    cv2.drawContours(img, approx, -1, (0, 255, 0), 3)
+                    candidate_approx.append(approx)
+                    candidate_contours.append(contour)
+        
+        n_blob = len(candidate_approx)
+        self.blob_corners = np.zeros((n_blob, 4, 2), np.float32)
+
+        for i in range(n_blob):
+            # find how much points are in each line (length)
+            fitted_lines = np.zeros((4, 4), np.float)
+            for j in range(4):
+                pt0 = candidate_approx[i][j]
+                pt1 = candidate_approx[i][(j + 1) % 4]
+                k0 = -1
+                k1 = -1
+                # find corresponding approximated point (pt0, pt1) in contours
+                for k in range(len(candidate_contours[i])):
+                    pt2 = candidate_contours[i][k]
+                    if pt2[0][0] == pt0[0][0] and pt2[0][1] == pt0[0][1]:
+                        k0 = k
+                    if pt2[0][0] == pt1[0][0] and pt2[0][1] == pt1[0][1]:
+                        k1 = k
+
+                # compute how much points are in this line
+                if k1 >= k0:
+                    length = k1 - k0 - 1
+                else:
+                    length = len(candidate_contours[i]) - k0 + k1 - 1
+
+                if length == 0:
+                    length = 1
+
+                line_pts = np.zeros((1, length, 2), np.float32)
+                # append this line's point to array 'line_pts'
+                for l in range(length):
+                    ll = (k0 + l + 1) % len(candidate_contours[i])
+                    line_pts[0, l, :] = candidate_contours[i][ll]
+
+                # Fit edge and put to vector of edges
+                [vx, vy, x, y] = cv2.fitLine(line_pts, cv2.DIST_L2, 0, 0.01, 0.01)
+                fitted_lines[j, :] = [vx, vy, x, y]
+                if visualise:
+                    # Finally draw the line
+                    # Now find two extreme points on the line to draw line
+                    left = [0, 0]
+                    right = [0, 0]
+                    length = 100
+                    left[0] = x - vx * length
+                    left[1] = y - vy * length
+                    right[0] = x + vx * length
+                    right[1] = y + vy * length
+                    cv2.line(img, tuple(left), tuple(right), 255, 2)
+
+            # Calculated four intersection points
+            for j in range(4):
+                intc = self.intersection(fitted_lines[j, :], fitted_lines[(j+1) % 4, :])
+                self.blob_corners[i, j, :] = intc
+
+            if visualise:
+                for j in range(4):
+                    intc = tuple(self.blob_corners[i, j, :])
+                    if j == 0:
+                        cv2.circle(img, intc, 5, (255, 255, 255))
+                    if j == 1:
+                        cv2.circle(img, intc, 5, (255, 0, 0))
+                    if j == 2:
+                        cv2.circle(img, intc, 5, (0, 255, 0))
+                    if j == 3:
+                        cv2.circle(img, intc, 5, (0, 0, 255))
+                cv2.imshow('frame', img)
+                cv2.waitKey(0)
+                cv2.destroyAllWindows()
+
+    def decode(self, transformed_img):
+        """
+        TODO
+        :param transformed_img:
+        :return:
+        """
+        num_step = 9
+        code = np.zeros((num_step, num_step), np.uint8)
+        step = int(transformed_img.shape[0] / num_step)
+        for i in range(num_step):
+            for j in range(num_step):
+                if np.average(transformed_img[i * step: i * step + step, j * step: j * step + step]) < 100:
+                    code[i, j] = 1
+                else:
+                    code[i, j] = 0
+        return code
+                
+    def rotateCorners(self, corner, i):
+        """
+        TODO
+        :param corner:
+        :param i:
+        :return:
+        """
+        return np.array([corner[i % 4, :], corner[(i + 1) % 4, :], corner[(i + 2) % 4, :], corner[(i + 3) % 4, :]])
+
+    def setMarkerImg(self, img):
+        """
+
+        :param img:
+        :return:
+        """
+        self.marker_img = img
+
+    def findMarker(self, marker_id, visualise=False):
+        """
+        TODO
+        :param marker_id:
+        :param visualise: (bool)
+        :return:
+        """
+        
+        print("find marker_id: ", marker_id)
+        for i_square in range(self.blob_corners.shape[0]):
+            # create marker's mask
+            self.marker_mask = np.zeros(self.gray.shape[0: 3], np.uint8)
+            cv2.fillConvexPoly(self.marker_mask,
+                               self.blob_corners[i_square, :, :].astype(np.int32).reshape(-1, 1, 2), 255)
+            
+            H, _ = cv2.findHomography(self.blob_corners[i_square, :, :], self.marker_square_pts, cv2.LMEDS)
+
+            marker_transformed = cv2.warpPerspective(self.edge, H,
+                                                     self.gray.shape[0:2])[0: self.marker_rows, 0: self.marker_cols]
+            if visualise:
+                cv2.imshow('frame', marker_transformed)
+                cv2.imshow('edge', self.edge)
+                cv2.waitKey(0)
+                cv2.destroyAllWindows()
+            code = self.decode(marker_transformed)
+            distance = np.zeros(4)
+
+            for i_rotate in range(4):  # determinate 0, 90, 180, 270 degree
+                distance[i_rotate] = hammingDistance(code.reshape((-1, )),
+                                                     self.marker_code[marker_id][i_rotate, :, :].reshape((-1,)))
+            dist_min_arg = np.argmin(distance)
+
+            if visualise:
+                print("minimum hangming distance: ", distance[dist_min_arg])
+                print(code)
+
+            if distance[dist_min_arg] < 3:
+                # find the correct marker!
+                # rotate the corners to the correct angle
+                self.blob_corners[i_square, :, :] = self.rotateCorners(self.blob_corners[i_square, :, :], dist_min_arg)
+
+                # compute the correct H
+                criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
+                self.blob_corners[i_square, :, :] = cv2.cornerSubPix(self.gray, self.blob_corners[i_square, :, :],
+                                                                     (11, 11), (-1, -1), criteria)
+
+                # Find the rotation and translation vectors. (camera to tag)
+                retval, rot_vec, trans_vec = cv2.solvePnP(self.marker_real_corners, self.blob_corners[i_square, :, :],
+                                                          self.camera_matrix.astype(np.float32),
+                                                          self.distortion_coefficients.astype(np.float32))
+               
+                rot_mat, _ = cv2.Rodrigues(rot_vec)
+                # reverse the rotation and translation (tag to camera)
+                tag_trans_coord_cam = - trans_vec
+                tag_rot_coord_cam = np.linalg.inv(rot_mat)
+                
+                if visualise and self.marker_img is not None:
+                    marker_img_corner = np.float32([[0, 0], [self.marker_img.shape[0], 0],
+                                                    [self.marker_img.shape[0], self.marker_img.shape[1]],
+                                                    [0, self.marker_img.shape[1]]]).reshape(-1, 1, 2)
+                    
+                    H, _ = cv2.findHomography(marker_img_corner, self.blob_corners[i_square, :, :])
+                    reconst = cv2.warpPerspective(self.marker_img, H, self.gray.shape)
+                    cv2.imshow('frame', reconst)
+                    cv2.waitKey(0)
+                    cv2.destroyAllWindows()
+                return tag_trans_coord_cam, tag_rot_coord_cam
+        raise ValueError("Not Found Tag")
+
+    def getMarkerPose(self, img, marker_ids, visualise=False):
+        """
+        TODO
+        :param img:
+        :param marker_ids:
+        :param visualise: (bool)
+        :return:
+        """
+        self.labelSquares(img, visualise)
+        if isinstance(marker_ids, list):
+            pos_dict = {}
+            rot_dict = {}   
+            for i in marker_ids:
+                pos, rot = self.findMarker(i, visualise)
+                pos_dict[i] = pos
+                rot_dict[i] = rot
+            return pos_dict, rot_dict
+        else:
+            return self.findMarker(marker_ids, visualise)
+
+
+def transformPosCamToGround(pos_coord_cam):
+    """
+    TODO : return pos_coord_ground ?
+    Transform
+    :param pos_coord_cam:
+    :return: (array)
+    """
+    pos_coord_ground = pos_coord_cam
+    pos_coord_ground[0] = pos_coord_ground[1]
+    pos_coord_ground[1] = - pos_coord_ground[0]
+    pos_coord_ground[2] = 0
+    return pos_coord_cam
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Script for finding a marker in a dataset")
+    parser.add_argument('--camera-info-path', type=str, default=None,
+                        help='Path to camera calibration config file (ros file).')
+    parser.add_argument('--path-to-data', type=str, default=None,
+                        help="Path to the dataset.")
+
+    # Ignore unknown args for now
+    args, unknown = parser.parse_known_args()
+    assert args.camera_info_path is not None and args.path_to_data is not None, \
+        "You should specify both the camera config file and dataset path to find marker position !"
+
+    # example, find the markers in the observation image, and get it's position in the ground
+    # Originally:
+    #  camera_info_path =
+    #       "/home/gaspard/Documents/ros_omnirobot/catkin_ws/src/omnirobot-dream/omnirobot_remote/cam_calib_info.yaml"
+    #  path = "/home/gaspard/Documents/ros_omnirobot/robotics-rl-srl-omnirobot/data/omnirobot_real_20190125_155902/"
+    camera_info_path = args.camera_info_path
+    path = args.path_to_data
+
+    cropped_img = cv2.imread(join(path, "record_008/frame000015.jpg"))
+
+    img = np.zeros((480, 640, 3), np.uint8)
+    img[0:480, 80:560, :] = cv2.resize(cropped_img, (480, 480))
+    robot_tag_img = cv2.imread("robot_tag.png")
+    robot_tag_code = np.array([
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 1, 1, 0, 1, 1, 0, 0],
+        [0, 0, 1, 1, 0, 1, 1, 0, 0],
+        [0, 0, 1, 0, 1, 0, 1, 0, 0],
+        [0, 0, 1, 1, 1, 1, 1, 0, 0],
+        [0, 0, 1, 1, 1, 1, 1, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.uint8)
+
+    maker_finder = MakerFinder(camera_info_path)
+    maker_finder.setMarkerCode('robot', robot_tag_code, 0.18)  # marker size 0.18m * 0.18m
+    maker_finder.setMarkerImg(robot_tag_img)
+    pos_coord_cam, rot_coord_cam = maker_finder.getMarkerPose(img, 'robot', False)
+    
+    pos_coord_ground = transformPosCamToGround(pos_coord_cam)
+    print("position in the ground: ", pos_coord_cam)
diff --git a/real_robots/omnirobot_utils/marker_render.py b/real_robots/omnirobot_utils/marker_render.py
new file mode 100644
index 000000000..5d12a6642
--- /dev/null
+++ b/real_robots/omnirobot_utils/marker_render.py
@@ -0,0 +1,185 @@
+import cv2
+import numpy as np
+from matplotlib import pyplot as plt
+
+
+class MarkerRender(object):
+    """
+    Add marker image to the original image. By this way, we can simulate the target and the robot.
+    """
+
+    def __init__(self, noise_var):
+        super(MarkerRender, self).__init__()
+        self.marker_image_with_margin = None  # need to read from file
+        self.margin = [0.0, 0.0, 0.0, 0.0]
+        
+        self.origin_image_shape = None
+        self.marker_image_transformed = None
+        self.marker_weight_transformed = None
+        self.bg_weight = None  # back ground mask, inverse of marker_mask
+
+        self.noise_var = noise_var
+        
+        self.initialized = False
+        self.roi_half_length = None
+        self.roi_length = None
+        self.marker_yaw = None
+        self.marker_pixel_pos = None
+        self.maker_scale = None
+        self.M_marker_with_margin = None
+        self.marker_pixel_pos_fraction = None
+        self.marker_pixel_pos_interger = None
+        self.marker_weight = None
+
+    def setMarkerImage(self, marker_image_with_margin, margin):
+        """
+
+        :param marker_image_with_margin:
+        :param margin:
+        :return:
+        """
+        if margin is np.ndarray:
+            margin = list(margin)
+            
+        assert len(margin) == 4, "margin should be a list of 4 numbers"
+        self.marker_image_with_margin = marker_image_with_margin
+        self.margin = margin
+        
+        self.roi_length = int(np.linalg.norm(self.marker_image_with_margin.shape) + 4)
+        if self.roi_length % 2 == 1:
+            self.roi_length += 1  # ensure its even
+        self.roi_half_length = self.roi_length // 2
+        
+        # setup margin's weight
+        self.marker_weight = np.ones(self.marker_image_with_margin.shape[0: 3], np.float32)
+        for i in range(self.margin[0]):
+            self.marker_weight[i, :] = i / self.margin[0] * 0.3
+        for i in range(self.margin[1]):
+            self.marker_weight[:, i] = i / self.margin[1] * 0.3
+        for i in range(self.margin[2]):
+            self.marker_weight[-i - 1, :] = i / self.margin[2] * 0.3
+        for i in range(self.margin[3]):
+            self.marker_weight[:, -i - 1] = i / self.margin[3] * 0.3
+        
+    def transformMarkerImage(self, marker_pixel_pos, marker_yaw, maker_scale):
+        """
+
+        :param marker_pixel_pos:
+        :param marker_yaw:
+        :param maker_scale:
+        :return:
+        """
+        self.marker_yaw = marker_yaw
+        self.marker_pixel_pos = np.float32(marker_pixel_pos).reshape((2, ))
+        self.maker_scale = maker_scale
+        self.M_marker_with_margin = cv2.getRotationMatrix2D((self.marker_image_with_margin.shape[1] / 2,
+                                                             self.marker_image_with_margin.shape[0] / 2),
+                                                            self.marker_yaw * 180 / np.pi, self.maker_scale)
+        self.marker_pixel_pos_fraction, self.marker_pixel_pos_interger = np.modf(self.marker_pixel_pos)
+
+        self.marker_pixel_pos_interger = self.marker_pixel_pos_interger.astype(np.int)
+        self.M_marker_with_margin[0, 2] += \
+            self.marker_pixel_pos_fraction[1] + self.roi_half_length - self.marker_image_with_margin.shape[0] / 2
+        self.M_marker_with_margin[1, 2] += \
+            self.marker_pixel_pos_fraction[0] + self.roi_half_length - self.marker_image_with_margin.shape[1] / 2
+
+        self.marker_image_transformed = cv2.warpAffine(self.marker_image_with_margin, self.M_marker_with_margin,
+                                                       (self.roi_length, self.roi_length))
+        
+        self.marker_weight_transformed = cv2.warpAffine(self.marker_weight, self.M_marker_with_margin,
+                                                        (self.roi_length, self.roi_length))  # white: Marker part
+
+        self.bg_weight = 1.0 - self.marker_weight_transformed  # white: origin image part
+
+    def generateNoise(self):
+        """
+
+        :return:
+        """
+        mean = 0.0
+        noise = np.random.standard_normal(self.marker_image_with_margin.shape) * self.noise_var + mean
+        noise = np.around(noise, 0)
+        
+        return cv2.warpAffine(noise, self.M_marker_with_margin, (self.roi_length, self.roi_length))
+        
+    def checkBoxIndex(self, box_index):
+        """
+
+        :param box_index:[x_min, x_max, y_min, y_max]
+        :return:
+        """
+        relative_index = [0, box_index[1] - box_index[0], 0, box_index[3] - box_index[2]]
+        if box_index[0] < 0:
+            relative_index[0] = relative_index[0] - box_index[0]
+            box_index[0] = 0
+        if box_index[1] > self.origin_image_shape[0]:
+            
+            relative_index[1] = relative_index[1] - (box_index[1] - self.origin_image_shape[0])
+            box_index[1] = self.origin_image_shape[0]
+        if box_index[2] < 0:
+            relative_index[2] = relative_index[2] - box_index[2]
+            box_index[2] = 0
+        if box_index[3] > self.origin_image_shape[1]:
+            relative_index[3] = relative_index[3] - (box_index[3] - self.origin_image_shape[1])
+            box_index[3] = self.origin_image_shape[1]
+        return box_index, relative_index
+
+    def addMarker(self, origin_image, marker_pixel_pos=None, marker_yaw=None, maker_scale=None):
+        """
+
+        :param origin_image:
+        :param marker_pixel_pos:
+        :param marker_yaw:
+        :param maker_scale:
+        :return:
+        """
+    
+        if not self.initialized:
+            self.origin_image_shape = origin_image.shape
+            self.initialized = True
+            
+        if marker_pixel_pos is not None:
+            # set Marker pixel position
+            self.transformMarkerImage(marker_pixel_pos, marker_yaw, maker_scale)
+        
+        noise = self.generateNoise()
+        # combine noise, target, back_ground images togethor
+        processed_image = origin_image.copy()
+        roi_area = [self.marker_pixel_pos_interger[1] - self.roi_half_length,
+                    self.marker_pixel_pos_interger[1] + self.roi_half_length,
+                    self.marker_pixel_pos_interger[0] - self.roi_half_length,
+                    self.marker_pixel_pos_interger[0] + self.roi_half_length]
+         
+        try:
+            processed_image[roi_area[0]: roi_area[1], roi_area[2]: roi_area[3], :] = \
+                        ((noise + self.marker_image_transformed) * self.marker_weight_transformed)\
+                        + origin_image[roi_area[0]: roi_area[1], roi_area[2]: roi_area[3], :] * self.bg_weight
+        except ValueError:
+            roi_area, relative_index = self.checkBoxIndex(roi_area) 
+            processed_image[roi_area[0]: roi_area[1], roi_area[2]: roi_area[3], :] = \
+                ((noise + self.marker_image_transformed) *
+                 self.marker_weight_transformed)[
+                relative_index[0]: relative_index[1], relative_index[2]: relative_index[3], :] + \
+                origin_image[roi_area[0]: roi_area[1], roi_area[2]: roi_area[3], :] \
+                * self.bg_weight[relative_index[0]: relative_index[1], relative_index[2]:relative_index[3], :]
+        return processed_image.astype(np.uint8)
+
+
+if __name__ == "__main__":
+    # example, add the Marker image to the observation image
+    origin_image = cv2.imread("omnirobot_utils/back_ground.jpg", cv2.IMREAD_COLOR)
+    origin_image = cv2.resize(origin_image, (480, 480))
+    marker_image_with_margin = cv2.imread("omnirobot_utils/robot_margin3_pixel_only_tag.png", cv2.IMREAD_COLOR)
+    plt.imshow(origin_image)
+    plt.show()
+    marker_render = MarkerRender(noise_var=1.0)
+    marker_render.setMarkerImage(marker_image_with_margin, [3, 3, 3, 3])
+
+    result = marker_render.addMarker(origin_image, marker_pixel_pos=[470, 300], marker_yaw=0, maker_scale=1.0)
+    # row, col, angle(rad)
+
+    plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+    plt.show()
+    cv2.imshow("result", result)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
diff --git a/real_robots/omnirobot_utils/omnirobot_manager_base.py b/real_robots/omnirobot_utils/omnirobot_manager_base.py
new file mode 100644
index 000000000..c7ca56e17
--- /dev/null
+++ b/real_robots/omnirobot_utils/omnirobot_manager_base.py
@@ -0,0 +1,158 @@
+from __future__ import division, print_function, absolute_import
+
+from real_robots.constants import *
+
+
+class OmnirobotManagerBase(object):
+    def __init__(self):
+        """
+        This class is the basic class for omnirobot server, and omnirobot simulator's server.
+        This class takes omnirobot position at instant t, and takes the action at instant t,
+        to determinate the position it should go at instant t+1, and the immediate reward it can get at instant t
+        """
+        super(OmnirobotManagerBase, self).__init__()
+        self.second_cam_topic = SECOND_CAM_TOPIC
+        self.episode_idx = 0
+
+        # the abstract object for robot,
+        # can be the real robot (Omnirobot class)
+        #  or the robot simulator (OmniRobotEnvRender class)
+        self.robot = None
+
+    def rightAction(self):
+        """
+        Let robot excute right action, and checking the boudary
+        :return has_bumped: (bool) 
+        """
+        if self.robot.robot_pos[1] - STEP_DISTANCE > MIN_Y:
+            self.robot.right()
+            has_bumped = False
+        else:
+            has_bumped = True
+        return has_bumped
+
+    def leftAction(self):
+        """
+        Let robot excute left action, and checking the boudary
+        :return has_bumped: (bool) 
+        """
+        if self.robot.robot_pos[1] + STEP_DISTANCE < MAX_Y:
+            self.robot.left()
+            has_bumped = False
+        else:
+            has_bumped = True
+        return has_bumped
+
+    def forwardAction(self):
+        """
+        Let robot excute forward action, and checking the boudary
+        :return has_bumped: (bool) 
+        """
+        if self.robot.robot_pos[0] + STEP_DISTANCE < MAX_X:
+            self.robot.forward()
+            has_bumped = False
+        else:
+            has_bumped = True
+        return has_bumped
+
+    def backwardAction(self):
+        """
+        Let robot excute backward action, and checking the boudary
+        :return has_bumped: (bool) 
+        """
+        if self.robot.robot_pos[0] - STEP_DISTANCE > MIN_X:
+            self.robot.backward()
+            has_bumped = False
+        else:
+            has_bumped = True
+        return has_bumped
+
+    def moveContinousAction(self, msg):
+        """
+        Let robot excute continous action, and checking the boudary
+        :return has_bumped: (bool) 
+        """
+        if MIN_X < self.robot.robot_pos[0] + msg['action'][0] < MAX_X and \
+                MIN_Y < self.robot.robot_pos[1] + msg['action'][1] < MAX_Y:
+            self.robot.moveContinous(msg['action'])
+            has_bumped = False
+        else:
+            has_bumped = True
+        return has_bumped
+
+    def sampleRobotInitalPosition(self):
+        """
+
+        :return: Sample random initial position for the Robot within the grid.
+        """
+        random_init_x = np.random.random_sample() * (INIT_MAX_X - INIT_MIN_X) + INIT_MIN_X
+        random_init_y = np.random.random_sample() * (INIT_MAX_Y - INIT_MIN_Y) + INIT_MIN_Y
+        return [random_init_x, random_init_y]
+    
+    def resetEpisode(self):
+        """
+        Give the correct sequance of commands to the robot 
+        to rest environment between the different episodes
+        """
+        if self.second_cam_topic is not None:
+            assert NotImplementedError
+        # Env reset
+        random_init_position = self.sampleRobotInitalPosition()
+        self.robot.setRobotCmd(random_init_position[0], random_init_position[1], 0)
+
+    def processMsg(self, msg):
+        """
+        Using this steps' msg command the determinate the correct position that the robot should be at next step,
+        and to determinate the reward of this step.
+        This function also takes care of the environment's reset.
+        :param msg: (dict)
+        """
+        command = msg.get('command', '')
+        if command == 'reset':
+            action = None
+            self.episode_idx += 1
+            self.resetEpisode()
+
+        elif command == 'action':
+            if msg.get('is_discrete', False):
+                action = Move(msg['action'])
+            else:
+                action = 'Continuous'
+
+        elif command == "exit":
+            print("recive exit signal, quit...")
+            exit(0)
+        else:
+            raise ValueError("Unknown command: {}".format(msg))
+
+        has_bumped = False
+        # We are always facing North
+        if action == Move.FORWARD:
+            has_bumped = self.forwardAction()
+        elif action == Move.STOP:
+            pass
+        elif action == Move.RIGHT:
+            has_bumped = self.rightAction()
+        elif action == Move.LEFT:
+            has_bumped = self.leftAction()
+        elif action == Move.BACKWARD:
+            has_bumped = self.backwardAction()
+        elif action == 'Continuous':
+            has_bumped = self.moveContinousAction(msg)
+        elif action == None:
+            pass
+        else:
+            print("Unsupported action: ", action)
+
+        # Determinate the reward for this step
+        
+        # Consider that we reached the target if we are close enough
+        # we detect that computing the difference in area between TARGET_INITIAL_AREA
+        # current detected area of the target
+        if np.linalg.norm(np.array(self.robot.robot_pos) - np.array(self.robot.target_pos)) \
+                < DIST_TO_TARGET_THRESHOLD:
+            self.reward = REWARD_TARGET_REACH
+        elif has_bumped:
+            self.reward = REWARD_BUMP_WALL
+        else:
+            self.reward = REWARD_NOTHING
diff --git a/real_robots/omnirobot_utils/red_target_margin4_pixel_480x480.png b/real_robots/omnirobot_utils/red_target_margin4_pixel_480x480.png
new file mode 100644
index 000000000..8d538e5c3
Binary files /dev/null and b/real_robots/omnirobot_utils/red_target_margin4_pixel_480x480.png differ
diff --git a/real_robots/omnirobot_utils/robot_margin3_pixel_only_tag.png b/real_robots/omnirobot_utils/robot_margin3_pixel_only_tag.png
new file mode 100644
index 000000000..32ab97020
Binary files /dev/null and b/real_robots/omnirobot_utils/robot_margin3_pixel_only_tag.png differ
diff --git a/real_robots/omnirobot_utils/robot_margin4_pixel.png b/real_robots/omnirobot_utils/robot_margin4_pixel.png
new file mode 100644
index 000000000..2266cdec2
Binary files /dev/null and b/real_robots/omnirobot_utils/robot_margin4_pixel.png differ
diff --git a/real_robots/omnirobot_utils/target_margin4_pixel.png b/real_robots/omnirobot_utils/target_margin4_pixel.png
new file mode 100644
index 000000000..278e50403
Binary files /dev/null and b/real_robots/omnirobot_utils/target_margin4_pixel.png differ
diff --git a/real_robots/omnirobot_utils/utils.py b/real_robots/omnirobot_utils/utils.py
new file mode 100644
index 000000000..e015cda7a
--- /dev/null
+++ b/real_robots/omnirobot_utils/utils.py
@@ -0,0 +1,135 @@
+from __future__ import division, print_function, absolute_import
+from gym import spaces
+from gym import logger
+import numpy as np
+import cv2
+
+
+class PosTransformer(object):
+    def __init__(self, camera_mat: np.ndarray, dist_coeffs: np.ndarray,
+                 pos_camera_coord_ground: np.ndarray, rot_mat_camera_coord_ground: np.ndarray):
+        """
+        Transform the position among physical position in camera coordinate,
+                                     physical position in ground coordinate,
+                                     pixel position of image
+        """
+        super(PosTransformer, self).__init__()
+        self.camera_mat = camera_mat
+
+        self.dist_coeffs = dist_coeffs
+
+        self.camera_2_ground_trans = np.zeros((4, 4), np.float)
+        self.camera_2_ground_trans[0:3, 0:3] = rot_mat_camera_coord_ground
+        self.camera_2_ground_trans[0:3, 3] = pos_camera_coord_ground
+        self.camera_2_ground_trans[3, 3] = 1.0
+
+        self.ground_2_camera_trans = np.linalg.inv(self.camera_2_ground_trans)
+
+    def phyPosCam2PhyPosGround(self, pos_coord_cam):
+        """
+        Transform physical position in camera coordinate to physical position in ground coordinate
+        """
+        assert pos_coord_cam.shape == (3, 1)
+        homo_pos = np.ones((4, 1), np.float32)
+        homo_pos[0:3, :] = pos_coord_cam
+        return (np.matmul(self.camera_2_ground_trans, homo_pos))[0:3, :]
+
+    def phyPosGround2PixelPos(self, pos_coord_ground, return_distort_image_pos=False):
+        """
+        Transform the physical position in ground coordinate to pixel position
+        """
+        pos_coord_ground = np.array(pos_coord_ground)
+        if len(pos_coord_ground.shape) == 1:
+            pos_coord_ground = pos_coord_ground.reshape(-1, 1)
+
+        assert pos_coord_ground.shape == (
+            3, 1) or pos_coord_ground.shape == (2, 1)
+
+        homo_pos = np.ones((4, 1), np.float32)
+        if pos_coord_ground.shape == (2, 1):
+            # by default, z =0 since it's on the ground
+            homo_pos[0:2, :] = pos_coord_ground
+            
+            # (np.random.randn() - 0.5) * 0.05 # add noise to the z-axis
+            homo_pos[2, :] = 0
+        else:
+            homo_pos[0:3, :] = pos_coord_ground
+        homo_pos = np.matmul(self.ground_2_camera_trans, homo_pos)
+        
+        pixel_points, _ = cv2.projectPoints(homo_pos[0:3, :].reshape(1, 1, 3), np.zeros((3, 1)), np.zeros((3, 1)),
+                                            self.camera_mat, self.dist_coeffs if return_distort_image_pos else None)
+        return pixel_points.reshape((2, 1))
+
+
+class RingBox(spaces.Box):
+    """
+    A ring box in R^n.
+    I.e., each coordinate is bounded.
+    there are minimum constrains (absolute) on all of the coordinates 
+    """
+
+    def __init__(self, positive_low=None, positive_high=None, negative_low=None, negative_high=None, shape=None,
+                 dtype=None):
+        """
+        for each coordinate
+        the value will be sampled from [positive_low, positive_hight] or [negative_low, negative_high]        
+        """
+        super(RingBox, self).__init__(low=negative_low, high=positive_high, shape=shape, dtype=dtype)
+
+        if shape is None:
+            assert positive_low.shape == positive_high.shape == negative_low.shape == negative_high.shape
+            shape = positive_low.shape
+        else:
+            assert np.isscalar(positive_low) and np.isscalar(
+                positive_high) and np.isscalar(negative_low) and np.isscalar(negative_high)
+            positive_low = positive_low + np.zeros(shape)
+            positive_high = positive_high + np.zeros(shape)
+            negative_low = negative_low + np.zeros(shape)
+            negative_high = negative_high + np.zeros(shape)
+
+        if dtype is None:  # Autodetect type
+            if (positive_high == 255).all():
+                dtype = np.uint8
+            else:
+                dtype = np.float32
+            logger.warn(
+                "Ring Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
+        self.positive_low = positive_low.astype(dtype)
+        self.positive_high = positive_high.astype(dtype)
+        self.negative_low = negative_low.astype(dtype)
+        self.negative_high = negative_high.astype(dtype)
+        self.length_positive = self.positive_high - self.positive_low
+        self.length_negative = self.negative_high - self.negative_low
+        self.np_random = np.random.RandomState()
+
+    def seed(self, seed):
+        self.np_random.seed(seed)
+
+    def sample(self):
+        length_positive = self.length_positive if self.dtype.kind == 'f' else self.length_positive.astype(
+            'int64') + 1
+        origin_sample = self.np_random.uniform(
+            low=-self.length_negative, high=length_positive, size=self.negative_high.shape).astype(self.dtype)
+        origin_sample = origin_sample + self.positive_low * \
+            (origin_sample >= 0) + self.negative_high * (origin_sample <= 0)
+        return origin_sample
+
+    def contains(self, x):
+        return x.shape == self.shape and np.logical_or(np.logical_and(x >= self.positive_low, x <= self.positive_high),
+                                                       np.logical_and(x <= self.negative_high,
+                                                                      x >= self.negative_low)).all()
+
+    def to_jsonable(self, sample_n):
+        return np.array(sample_n).tolist()
+
+    def from_jsonable(self, sample_n):
+        return [np.asarray(sample) for sample in sample_n]
+
+    def __repr__(self):
+        return "RingBox" + str(self.shape)
+
+    def __eq__(self, other):
+        return np.allclose(self.positive_low, other.positive_low) and \
+               np.allclose(self.positive_high, other.positive_high) and \
+               np.allclose(self.negative_low, other.negative_low) and \
+               np.allclose(self.negative_high, other.negative_high)
diff --git a/real_robots/real_baxter_debug.py b/real_robots/real_baxter_debug.py
index 066de3eec..b8da58355 100644
--- a/real_robots/real_baxter_debug.py
+++ b/real_robots/real_baxter_debug.py
@@ -63,8 +63,6 @@ def resetPose():
             arm_pos = baxter_utils.get_ee_position(arm)
             if name == "left_arm":
                 ee_orientation = baxter_utils.get_ee_orientation(left_arm)
-                # print(ee_orientation)
-                # print(name, arm_pos)
                 print(np.linalg.norm(BUTTON_POS - arm_pos, 2))
         rospy.sleep(0.5)
 except KeyboardInterrupt:
diff --git a/replay/compare_plots.py b/replay/compare_plots.py
index f7445837e..8465e5d80 100644
--- a/replay/compare_plots.py
+++ b/replay/compare_plots.py
@@ -5,6 +5,7 @@
 import numpy as np
 import seaborn as sns
 from matplotlib.ticker import FuncFormatter
+from matplotlib import rc
 
 from replay.aggregate_plots import lightcolors, darkcolors, Y_LIM_SHAPED_REWARD, Y_LIM_SPARSE_REWARD, millions
 from srl_zoo.utils import printGreen, printRed
@@ -12,8 +13,8 @@
 # Init seaborn
 sns.set()
 # Style for the title
-fontstyle = {'fontname': 'DejaVu Sans', 'fontsize': 16}
-
+fontstyle = {'fontname': 'DejaVu Sans', 'fontsize': 22, 'fontweight': 'bold'}
+rc('font', weight='bold')
 
 def comparePlots(path, plots, y_limits, title="Learning Curve",
                  timesteps=False, truncate_x=-1, no_display=False):
@@ -63,16 +64,16 @@ def comparePlots(path, plots, y_limits, title="Learning Curve",
 
     if timesteps:
         formatter = FuncFormatter(millions)
-        plt.xlabel('Number of Timesteps')
+        plt.xlabel('Number of Timesteps', fontsize=20, fontweight='bold')
         fig.axes[0].xaxis.set_major_formatter(formatter)
     else:
         plt.xlabel('Number of Episodes')
-    plt.ylabel('Rewards')
+    plt.ylabel('Rewards', fontsize=20, fontweight='bold')
 
     plt.title(title, **fontstyle)
     plt.ylim(y_limits)
 
-    plt.legend(framealpha=0.8, frameon=True, labelspacing=0.01, loc='lower right', fontsize=16)
+    plt.legend(framealpha=0.8, frameon=True, labelspacing=0.01, loc='lower right', fontsize=18)
 
     if not no_display:
         plt.show()
diff --git a/replay/enjoy_baselines.py b/replay/enjoy_baselines.py
index 05a50d16c..2405e31dd 100644
--- a/replay/enjoy_baselines.py
+++ b/replay/enjoy_baselines.py
@@ -9,7 +9,7 @@
 import yaml
 import numpy as np
 import tensorflow as tf
-from baselines.common import set_global_seeds
+from stable_baselines.common import set_global_seeds
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
 from sklearn.decomposition import PCA
diff --git a/rl_baselines/base_classes.py b/rl_baselines/base_classes.py
index e6388ed9c..9e417c053 100644
--- a/rl_baselines/base_classes.py
+++ b/rl_baselines/base_classes.py
@@ -116,6 +116,7 @@ def __init__(self, name, model_class):
         self.ob_space = None
         self.ac_space = None
         self.policy = None
+        self.load_rl_model_path = None
 
     def save(self, save_path, _locals=None):
         """
@@ -137,6 +138,14 @@ def save(self, save_path, _locals=None):
         with open(save_path, "wb") as f:
             pickle.dump(save_param, f)
 
+    def setLoadPath(self, load_path):
+        """
+        Set the path to later load the parameters of a trained rl model
+        :param load_path: (str)
+        :return: None
+        """
+        self.load_rl_model_path = load_path
+       
     @classmethod
     def load(cls, load_path, args=None):
         """
@@ -229,13 +238,16 @@ def train(self, args, callback, env_kwargs=None, train_kwargs=None):
         self.ob_space = envs.observation_space
         self.ac_space = envs.action_space
 
-        policy_fn = {'cnn': CnnPolicy,
-                     'cnn-lstm': CnnLstmPolicy,
-                     'cnn-lnlstm': CnnLnLstmPolicy,
-                     'mlp': MlpPolicy,
-                     'lstm': MlpLstmPolicy,
-                     'lnlstm': MlpLnLstmPolicy}[args.policy]
-
-        self.model = self.model_class(policy_fn, envs, **train_kwargs)
+        policy_fn = {'cnn': "CnnPolicy",
+                     'cnn-lstm': "CnnLstmPolicy",
+                     'cnn-lnlstm': "CnnLnLstmPolicy",
+                     'mlp': "MlpPolicy",
+                     'lstm': "MlpLstmPolicy",
+                     'lnlstm': "MlpLnLstmPolicy"}[args.policy]
+        if self.load_rl_model_path is not None:
+            print("Load trained model from the path: ", self.load_rl_model_path)
+            self.model = self.model_class.load(self.load_rl_model_path, envs, **train_kwargs)
+        else:
+            self.model = self.model_class(policy_fn, envs, **train_kwargs)
         self.model.learn(total_timesteps=args.num_timesteps, seed=args.seed, callback=callback)
         envs.close()
diff --git a/rl_baselines/train.py b/rl_baselines/train.py
index fa910c85f..b4239d2bc 100644
--- a/rl_baselines/train.py
+++ b/rl_baselines/train.py
@@ -205,7 +205,9 @@ def main():
                         help="Min number of episodes before saving best model")
     parser.add_argument('--latest', action='store_true', default=False,
                         help='load the latest learned model (location:srl_zoo/logs/DatasetName/)')
-
+    parser.add_argument('--load-rl-model-path', type=str, default=None,
+                        help="load the trained RL model, should be with the same algorithm type")
+    
     # Ignore unknown args for now
     args, unknown = parser.parse_known_args()
     env_kwargs = {}
@@ -246,6 +248,7 @@ def main():
     algo_class, algo_type, action_type = registered_rl[args.algo]
     algo = algo_class()
     ALGO = algo
+    
 
     # if callback frequency needs to be changed
     LOG_INTERVAL = algo.LOG_INTERVAL
@@ -317,7 +320,16 @@ def main():
     # Get the hyperparameter, if given (Hyperband)
     hyperparams = {param.split(":")[0]: param.split(":")[1] for param in args.hyperparam}
     hyperparams = algo.parserHyperParam(hyperparams)
+    
+    if args.load_rl_model_path is not None:
+        #use a small learning rate
+        print("use a small learning rate: {:f}".format(1.0e-4))
+        hyperparams["learning_rate"] = lambda f: f * 1.0e-4
+        
     # Train the agent
+
+    if args.load_rl_model_path is not None:
+        algo.setLoadPath(args.load_rl_model_path)
     algo.train(args, callback, env_kwargs=env_kwargs, train_kwargs=hyperparams)
 
 
diff --git a/state_representation/episode_saver.py b/state_representation/episode_saver.py
index 83f7469c9..842cd3582 100644
--- a/state_representation/episode_saver.py
+++ b/state_representation/episode_saver.py
@@ -25,7 +25,7 @@ class EpisodeSaver(object):
     """
 
     def __init__(self, name, max_dist, state_dim=-1, globals_=None, learn_every=3, learn_states=False,
-                 path='srl_zoo/data/', relative_pos=False):
+                 path='data/', relative_pos=False):
         super(EpisodeSaver, self).__init__()
         self.name = name
         self.data_folder = path + name
@@ -72,8 +72,8 @@ def saveImage(self, observation):
         :param observation: (numpy matrix) BGR image
         """
         image_path = "{}/{}/frame{:06d}".format(self.data_folder, self.episode_folder, self.episode_step)
-        self.images_path.append(image_path)
-
+        relative_path = "{}/{}/frame{:06d}".format(self.name, self.episode_folder, self.episode_step)
+        self.images_path.append(relative_path)
         # in the case of dual/multi-camera
         if observation.shape[2] > 3:
             observation1 = cv2.cvtColor(observation[:, :, :3], cv2.COLOR_BGR2RGB)
@@ -92,7 +92,6 @@ def reset(self, observation, target_pos, ground_truth):
         :param target_pos: (numpy array)
         :param ground_truth: (numpy array)
         """
-        # only reset if the array is empty, or the a reset has not already occured
         if len(self.episode_starts) == 0 or self.episode_starts[-1] is False:
             self.episode_idx += 1
 
@@ -120,6 +119,7 @@ def step(self, observation, action, reward, done, ground_truth_state):
         :param done: (bool) whether the episode is done or not
         :param ground_truth_state: (numpy array)
         """
+        
         self.episode_step += 1
         self.n_steps += 1
         self.rewards.append(reward)
@@ -131,10 +131,10 @@ def step(self, observation, action, reward, done, ground_truth_state):
             self.episode_starts.append(False)
             self.ground_truth_states.append(ground_truth_state)
             self.saveImage(observation)
-        else:
+        else:   
             # Save the gathered data at the end of each episode
             self.save()
-
+    
     def save(self):
         """
         Write data and ground truth to disk
diff --git a/tests/test_dataset_manipulation.py b/tests/test_dataset_manipulation.py
new file mode 100644
index 000000000..22ccc0be0
--- /dev/null
+++ b/tests/test_dataset_manipulation.py
@@ -0,0 +1,43 @@
+import subprocess
+import pytest
+import os
+import shutil
+
+DATA_FOLDER_NAME_1 = "kuka_test_f1"
+DATA_FOLDER_NAME_2 = "kuka_test_f2"
+DATA_FOLDER_NAME_3 = "kuka_test_f3"
+DEFAULT_ENV = "KukaButtonGymEnv-v0"
+PATH_SRL = "srl_zoo/data/"
+
+
+def assertEq(left, right):
+    assert left == right, "{} != {}".format(left, right)
+
+
+@pytest.mark.fast
+def testDataGenForFusion():
+    args_1 = ['--num-cpu', 4, '--num-episode', 8, '--name', DATA_FOLDER_NAME_1, '--force', '--env', DEFAULT_ENV]
+    args_1 = list(map(str, args_1))
+
+    ok = subprocess.call(['python', '-m', 'environments.dataset_generator'] + args_1)
+    assertEq(ok, 0)
+
+    args_2 = ['--num-cpu', 4, '--num-episode', 8, '--name', DATA_FOLDER_NAME_2, '--force', '--env', DEFAULT_ENV]
+    args_2 = list(map(str, args_2))
+
+    ok = subprocess.call(['python', '-m', 'environments.dataset_generator'] + args_2)
+    assertEq(ok, 0)
+
+    args_3 = ['--merge', PATH_SRL + DATA_FOLDER_NAME_1, PATH_SRL + DATA_FOLDER_NAME_2, PATH_SRL + DATA_FOLDER_NAME_3]
+    args_3 = list(map(str, args_3))
+
+    ok = subprocess.call(['python', '-m', 'environments.dataset_fusioner'] + args_3)
+    assertEq(ok, 0)
+
+    # Checking inexistance of original datasets to be merged
+    assert not os.path.isdir(PATH_SRL + DATA_FOLDER_NAME_1)
+    assert not os.path.isdir(PATH_SRL + DATA_FOLDER_NAME_2)
+    assert os.path.isdir(PATH_SRL + DATA_FOLDER_NAME_3)
+
+    # Removing fusionned test dataset
+    shutil.rmtree(PATH_SRL + DATA_FOLDER_NAME_3)
diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py
index 4d27ad6f2..01122a023 100644
--- a/tests/test_end_to_end.py
+++ b/tests/test_end_to_end.py
@@ -191,9 +191,11 @@ def testAllSrlonRLTrain(algo):
     args = ['--algo', algo, '--env', DEFAULT_ENV, '--srl-model', DEFAULT_SRL,
             '--num-timesteps', NUM_TIMESTEP, '--seed', SEED, '--num-iteration', NUM_ITERATION,
             '--no-vis', '--srl-config-file', DEFAULT_SRL_CONFIG_YAML]
-    if algo == "ddpg":
+    if algo == "ddpg" or algo == "sac":
         mem_limit = 100 if DEFAULT_SRL == 'raw_pixels' else 100000
-        args.extend(['-c', '--memory-limit', mem_limit])
+        args.extend(['-c'])
+        if algo == "ddpg":
+            args.extend(['--memory-limit', mem_limit])
     elif algo == "acer":
         args.extend(['--num-stack', 4])
 
diff --git a/tests/test_enjoy.py b/tests/test_enjoy.py
index dc7806083..1921dc69c 100644
--- a/tests/test_enjoy.py
+++ b/tests/test_enjoy.py
@@ -44,7 +44,8 @@ def assertNeq(left, right):
 
 
 @pytest.mark.fast
-@pytest.mark.parametrize("algo", ['a2c', 'acer', 'acktr', 'ars', 'cma-es', 'ddpg', 'deepq', 'ppo1', 'ppo2', 'sac', 'trpo'])
+@pytest.mark.parametrize("algo", ['a2c', 'acer', 'acktr', 'ars', 'cma-es', 'ddpg', 'deepq', 'ppo1', 'ppo2', 'sac',
+                                  'trpo'])
 def testBaselineTrain(algo):
     """
     test for the given RL algorithm
@@ -53,8 +54,10 @@ def testBaselineTrain(algo):
     args = ['--algo', algo, '--srl-model', DEFAULT_SRL, '--num-timesteps', NUM_TRAIN_TIMESTEP, '--seed', SEED,
             '--num-iteration', NUM_ITERATION, '--no-vis', '--log-dir', DEFAULT_LOG_DIR, '--env', DEFAULT_ENV,
             '--min-episodes-save', 1]
-    if algo == "ddpg":
-        args.extend(['-c', '--memory-limit', 100])
+    if algo == "ddpg" or algo == "sac":
+        args.extend(['-c'])
+        if algo == "ddpg":
+            args.extend(['--memory-limit', 100])
     elif algo == "acer":
         args.extend(['--num-stack', 4])
 
@@ -68,7 +71,8 @@ def testBaselineTrain(algo):
 
 
 @pytest.mark.slow
-@pytest.mark.parametrize("algo", ['a2c', 'acer', 'acktr', 'ars', 'cma-es', 'ddpg', 'deepq', 'ppo1', 'ppo2', 'sac', 'trpo'])
+@pytest.mark.parametrize("algo", ['a2c', 'acer', 'acktr', 'ars', 'cma-es', 'ddpg', 'deepq', 'ppo1', 'ppo2', 'sac',
+                                  'trpo'])
 def testEnjoyBaselines(algo):
     """
     test the enjoy script for the given RL algorithm
diff --git a/tests/test_hyperparam_search.py b/tests/test_hyperparam_search.py
index 0e052de09..95afc8962 100644
--- a/tests/test_hyperparam_search.py
+++ b/tests/test_hyperparam_search.py
@@ -47,8 +47,10 @@ def testRLHyperparamSearch(algo):
     """
     args = ['--optimizer', DEFAULT_OPTIMIZER, '--algo', algo, '--srl-model', DEFAULT_SRL, '--max-eval', MAX_EVAL,
             '--num-timesteps', NUM_TIMESTEP, '--seed', SEED, '--env', DEFAULT_ENV]
-    if algo == "ddpg":
-        args.extend(['-c', '--memory-limit', 100000])
+    if algo == "ddpg" or algo == "sac":
+        args.extend(['-c'])
+        if algo == "ddpg":
+            args.extend(['--memory-limit', 100000])
     elif algo == "acer":
         args.extend(['--num-stack', 4])
 
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index a6b9e9d6a..73d09939d 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -34,7 +34,8 @@ def assertNeq(left, right):
 
 
 # ignoring 'acktr', as it will run out of memory and crash tensorflow's allocation
-@pytest.mark.parametrize("algo", ['a2c', 'acer', 'ars', 'cma-es', 'ddpg', 'deepq', 'ppo1', 'ppo2', 'random_agent', 'sac', 'trpo'])
+@pytest.mark.parametrize("algo", ['a2c', 'acer', 'ars', 'cma-es', 'ddpg', 'deepq', 'ppo1', 'ppo2', 'random_agent',
+                                  'sac', 'trpo'])
 @pytest.mark.parametrize("model_type", ['raw_pixels'])
 def testBaselineTrain(algo, model_type):
     """
@@ -44,10 +45,12 @@ def testBaselineTrain(algo, model_type):
     """
     args = ['--algo', algo, '--srl-model', model_type, '--num-timesteps', NUM_TIMESTEP, '--seed', SEED,
             '--num-iteration', NUM_ITERATION, '--no-vis', '--env', DEFAULT_ENV]
-    if algo == "ddpg":
+    if algo == "ddpg" or algo == "sac":
         # Prevent RAM issue because of the replay buffer
         mem_limit = 100 if model_type == 'raw_pixels' else 100000
-        args.extend(['-c', '--memory-limit', mem_limit])
+        args.extend(['-c'])
+        if algo == "ddpg":
+            args.extend(['--memory-limit', mem_limit])
     elif algo == "acer":
         args.extend(['--num-stack', 4])
 
@@ -91,7 +94,8 @@ def testEnvSRLTrain(model_type, env):
 
 @pytest.mark.fast
 @pytest.mark.parametrize("env", ["KukaRandButtonGymEnv-v0", "Kuka2ButtonGymEnv-v0", "KukaMovingButtonGymEnv-v0",
-                                 "MobileRobot2TargetGymEnv-v0", "MobileRobot1DGymEnv-v0", "MobileRobotLineTargetGymEnv-v0"])
+                                 "MobileRobot2TargetGymEnv-v0", "MobileRobot1DGymEnv-v0",
+                                 "MobileRobotLineTargetGymEnv-v0", "OmnirobotEnv-v0"])
 def testEnvTrain(env):
     """
     test the environment on the RL pipeline
@@ -108,7 +112,8 @@ def testEnvTrain(env):
 
 
 @pytest.mark.fast
-@pytest.mark.parametrize("env", ["KukaButtonGymEnv-v0", "MobileRobotGymEnv-v0", "CarRacingGymEnv-v0"])
+@pytest.mark.parametrize("env", ["KukaButtonGymEnv-v0", "MobileRobotGymEnv-v0", "CarRacingGymEnv-v0",
+                                 "OmnirobotEnv-v0"])
 @pytest.mark.parametrize("algo", ['a2c', 'ppo1', 'ppo2', 'sac', 'trpo'])
 def testContinousEnvTrain(env, algo):
     """