diff --git a/droid/camera_utils/camera_readers/zed_camera.py b/droid/camera_utils/camera_readers/zed_camera.py index ba10ec7..1e25988 100644 --- a/droid/camera_utils/camera_readers/zed_camera.py +++ b/droid/camera_utils/camera_readers/zed_camera.py @@ -131,6 +131,7 @@ def _configure_camera(self, init_params): self._current_params = init_params sl_params = sl.InitParameters(**init_params) sl_params.set_from_serial_number(int(self.serial_number)) + sl_params.camera_image_flip = sl.FLIP_MODE.OFF status = self._cam.open(sl_params) if status != sl.ERROR_CODE.SUCCESS: raise RuntimeError("Camera Failed To Open") diff --git a/droid/data_processing/timestep_processing.py b/droid/data_processing/timestep_processing.py index 2ef7460..63297fa 100644 --- a/droid/data_processing/timestep_processing.py +++ b/droid/data_processing/timestep_processing.py @@ -13,6 +13,7 @@ def __init__( self, ignore_action=False, action_space="cartesian_velocity", + gripper_action_space=None, robot_state_keys=["cartesian_position", "gripper_position", "joint_positions", "joint_velocities"], camera_extrinsics=["hand_camera", "varied_camera", "fixed_camera"], state_dtype=np.float32, @@ -22,7 +23,7 @@ def __init__( assert action_space in ["cartesian_position", "joint_position", "cartesian_velocity", "joint_velocity"] self.action_space = action_space - self.gripper_key = "gripper_velocity" if "velocity" in action_space else "gripper_position" + self.gripper_key = "gripper_velocity" if "velocity" in gripper_action_space else "gripper_position" self.ignore_action = ignore_action self.robot_state_keys = robot_state_keys @@ -68,6 +69,27 @@ def forward(self, timestep): if len(extrinsics_state): extrinsics_state = np.concatenate(extrinsics_state) + ### Get Intrinsics ### + cam_intrinsics_obs = timestep["observation"]["camera_intrinsics"] + sorted_calibrated_ids = sorted(calibration_dict.keys()) + intrinsics_dict = defaultdict(list) + + for serial_number in sorted_camera_ids: + cam_type = camera_type_dict[serial_number] + if cam_type not in self.camera_extrinsics: + continue + + full_cam_ids = sorted(cam_intrinsics_obs.keys()) + for full_cam_id in full_cam_ids: + if serial_number in full_cam_id: + intr = cam_intrinsics_obs[full_cam_id] + intrinsics_dict[cam_type].append(intr) + + sorted_intrinsics_keys = sorted(intrinsics_dict.keys()) + intrinsics_state = list([np.array(intrinsics_dict[cam_type]).flatten() for cam_type in sorted_intrinsics_keys]) + if len(intrinsics_state): + intrinsics_state = np.concatenate(intrinsics_state) + ### Get High Dimensional State Info ### high_dim_state_dict = defaultdict(lambda: defaultdict(list)) @@ -84,7 +106,7 @@ def forward(self, timestep): high_dim_state_dict[obs_type][cam_type].append(data) ### Finish Observation Portion ### - low_level_state = np.concatenate([robot_state, extrinsics_state], dtype=self.state_dtype) + low_level_state = np.concatenate([robot_state, extrinsics_state, intrinsics_state], dtype=self.state_dtype) processed_timestep = {"observation": {"state": low_level_state, "camera": high_dim_state_dict}} self.image_transformer.forward(processed_timestep) @@ -95,4 +117,8 @@ def forward(self, timestep): action = np.concatenate([arm_action, [gripper_action]], dtype=self.action_dtype) processed_timestep["action"] = action + # return raw information + meta data + processed_timestep["extrinsics_dict"] = extrinsics_dict + processed_timestep["intrinsics_dict"] = intrinsics_dict + return processed_timestep diff --git a/droid/evaluation/eval_launcher_robomimic.py b/droid/evaluation/eval_launcher_robomimic.py new file mode 100644 index 0000000..5e4eb4a --- /dev/null +++ b/droid/evaluation/eval_launcher_robomimic.py @@ -0,0 +1,259 @@ +import json +import os +import numpy as np +import torch +from collections import OrderedDict +from copy import deepcopy + +from droid.controllers.oculus_controller import VRPolicy +from droid.evaluation.policy_wrapper import PolicyWrapperRobomimic +from droid.robot_env import RobotEnv +from droid.user_interface.data_collector import DataCollecter +from droid.user_interface.gui import RobotGUI + +import robomimic.utils.file_utils as FileUtils +import robomimic.utils.torch_utils as TorchUtils +import robomimic.utils.tensor_utils as TensorUtils + +import cv2 + +def eval_launcher(variant, run_id, exp_id): + # Get Directory # + dir_path = os.path.dirname(os.path.realpath(__file__)) + + # Prepare Log Directory # + variant["exp_name"] = os.path.join(variant["exp_name"], "run{0}/id{1}/".format(run_id, exp_id)) + log_dir = os.path.join(dir_path, "../../evaluation_logs", variant["exp_name"]) + + # Set Random Seeds # + torch.manual_seed(variant["seed"]) + np.random.seed(variant["seed"]) + + # Set Compute Mode # + use_gpu = variant.get("use_gpu", False) + torch.device("cuda:0" if use_gpu else "cpu") + + ckpt_path = variant["ckpt_path"] + + device = TorchUtils.get_torch_device(try_to_use_cuda=True) + ckpt_dict = FileUtils.maybe_dict_from_checkpoint(ckpt_path=ckpt_path) + config = json.loads(ckpt_dict["config"]) + + ### infer image size ### + for obs_key in ckpt_dict["shape_metadata"]["all_shapes"].keys(): + if 'camera/image' in obs_key: + imsize = max(ckpt_dict["shape_metadata"]["all_shapes"][obs_key]) + break + + ckpt_dict["config"] = json.dumps(config) + policy, _ = FileUtils.policy_from_checkpoint(ckpt_dict=ckpt_dict, device=device, verbose=True) + policy.goal_mode = config["train"]["goal_mode"] + policy.eval_mode = True + + # determine the action space (relative or absolute) + action_keys = config["train"]["action_keys"] + if "action/rel_pos" in action_keys: + action_space = "cartesian_velocity" + for k in action_keys: + assert not k.startswith("action/abs_") + elif "action/abs_pos" in action_keys: + action_space = "cartesian_position" + for k in action_keys: + assert not k.startswith("action/rel_") + else: + raise ValueError + + # determine the action space for the gripper + if "action/gripper_velocity" in action_keys: + gripper_action_space = "velocity" + elif "action/gripper_position" in action_keys: + gripper_action_space = "position" + else: + raise ValueError + + # determine the action space (relative or absolute) + action_keys = config["train"]["action_keys"] + if "action/rel_pos" in action_keys: + action_space = "cartesian_velocity" + for k in action_keys: + assert not k.startswith("action/abs_") + elif "action/abs_pos" in action_keys: + action_space = "cartesian_position" + for k in action_keys: + assert not k.startswith("action/rel_") + else: + raise ValueError + + # determine the action space for the gripper + if "action/gripper_velocity" in action_keys: + gripper_action_space = "velocity" + elif "action/gripper_position" in action_keys: + gripper_action_space = "position" + else: + raise ValueError + + # Prepare Policy Wrapper # + data_processing_kwargs = dict( + timestep_filtering_kwargs=dict( + action_space=action_space, + gripper_action_space=gripper_action_space, + robot_state_keys=["cartesian_position", "gripper_position", "joint_positions"], + # camera_extrinsics=[], + ), + image_transform_kwargs=dict( + remove_alpha=True, + bgr_to_rgb=True, + to_tensor=True, + augment=False, + ), + ) + timestep_filtering_kwargs = data_processing_kwargs.get("timestep_filtering_kwargs", {}) + image_transform_kwargs = data_processing_kwargs.get("image_transform_kwargs", {}) + + policy_data_processing_kwargs = {} + policy_timestep_filtering_kwargs = policy_data_processing_kwargs.get("timestep_filtering_kwargs", {}) + policy_image_transform_kwargs = policy_data_processing_kwargs.get("image_transform_kwargs", {}) + + policy_timestep_filtering_kwargs.update(timestep_filtering_kwargs) + policy_image_transform_kwargs.update(image_transform_kwargs) + + fs = config["train"]["frame_stack"] + + wrapped_policy = PolicyWrapperRobomimic( + policy=policy, + timestep_filtering_kwargs=policy_timestep_filtering_kwargs, + image_transform_kwargs=policy_image_transform_kwargs, + frame_stack=fs, + eval_mode=True, + ) + + camera_kwargs = dict( + hand_camera=dict(image=True, concatenate_images=False, resolution=(imsize, imsize), resize_func="cv2"), + varied_camera=dict(image=True, concatenate_images=False, resolution=(imsize, imsize), resize_func="cv2"), + ) + + policy_camera_kwargs = {} + policy_camera_kwargs.update(camera_kwargs) + + env = RobotEnv( + action_space=policy_timestep_filtering_kwargs["action_space"], + gripper_action_space=policy_timestep_filtering_kwargs["gripper_action_space"], + camera_kwargs=policy_camera_kwargs + ) + controller = VRPolicy() + + # Launch GUI # + data_collector = DataCollecter( + env=env, + controller=controller, + policy=wrapped_policy, + save_traj_dir=log_dir, + save_data=variant.get("save_data", True), + ) + RobotGUI(robot=data_collector) + + +def get_goal_im(variant, run_id, exp_id): + # Get Directory # + dir_path = os.path.dirname(os.path.realpath(__file__)) + + # Prepare Log Directory # + variant["exp_name"] = os.path.join(variant["exp_name"], "run{0}/id{1}/".format(run_id, exp_id)) + log_dir = os.path.join(dir_path, "../../evaluation_logs", variant["exp_name"]) + + # Set Random Seeds # + torch.manual_seed(variant["seed"]) + np.random.seed(variant["seed"]) + + # Set Compute Mode # + use_gpu = variant.get("use_gpu", False) + torch.device("cuda:0" if use_gpu else "cpu") + + ckpt_path = variant["ckpt_path"] + + device = TorchUtils.get_torch_device(try_to_use_cuda=True) + ckpt_dict = FileUtils.maybe_dict_from_checkpoint(ckpt_path=ckpt_path) + config = json.loads(ckpt_dict["config"]) + + ### infer image size ### + imsize = max(ckpt_dict["shape_metadata"]["all_shapes"]["camera/image/hand_camera_left_image"]) + + ckpt_dict["config"] = json.dumps(config) + policy, _ = FileUtils.policy_from_checkpoint(ckpt_dict=ckpt_dict, device=device, verbose=True) + + # determine the action space (relative or absolute) + action_keys = config["train"]["action_keys"] + if "action/rel_pos" in action_keys: + action_space = "cartesian_velocity" + for k in action_keys: + assert not k.startswith("action/abs_") + elif "action/abs_pos" in action_keys: + action_space = "cartesian_position" + for k in action_keys: + assert not k.startswith("action/rel_") + else: + raise ValueError + + # determine the action space for the gripper + if "action/gripper_velocity" in action_keys: + gripper_action_space = "velocity" + elif "action/gripper_position" in action_keys: + gripper_action_space = "position" + else: + raise ValueError + + # Prepare Policy Wrapper # + data_processing_kwargs = dict( + timestep_filtering_kwargs=dict( + action_space=action_space, + gripper_action_space=gripper_action_space, + robot_state_keys=["cartesian_position", "gripper_position", "joint_positions"], + # camera_extrinsics=[], + ), + image_transform_kwargs=dict( + remove_alpha=True, + bgr_to_rgb=True, + to_tensor=True, + augment=False, + ), + ) + timestep_filtering_kwargs = data_processing_kwargs.get("timestep_filtering_kwargs", {}) + image_transform_kwargs = data_processing_kwargs.get("image_transform_kwargs", {}) + + policy_data_processing_kwargs = {} + policy_timestep_filtering_kwargs = policy_data_processing_kwargs.get("timestep_filtering_kwargs", {}) + policy_image_transform_kwargs = policy_data_processing_kwargs.get("image_transform_kwargs", {}) + + policy_timestep_filtering_kwargs.update(timestep_filtering_kwargs) + policy_image_transform_kwargs.update(image_transform_kwargs) + + wrapped_policy = PolicyWrapperRobomimic( + policy=policy, + timestep_filtering_kwargs=policy_timestep_filtering_kwargs, + image_transform_kwargs=policy_image_transform_kwargs, + frame_stack=config["train"]["frame_stack"], + eval_mode=True, + ) + + camera_kwargs = dict( + hand_camera=dict(image=True, concatenate_images=False, resolution=(imsize, imsize), resize_func="cv2"), + varied_camera=dict(image=True, concatenate_images=False, resolution=(imsize, imsize), resize_func="cv2"), + ) + + policy_camera_kwargs = {} + policy_camera_kwargs.update(camera_kwargs) + + env = RobotEnv( + action_space=policy_timestep_filtering_kwargs["action_space"], + gripper_action_space=policy_timestep_filtering_kwargs["gripper_action_space"], + camera_kwargs=policy_camera_kwargs, + do_reset=False + ) + + ims = env.read_cameras()[0]["image"] + if not os.path.exists('eval_params'): + os.makedirs('eval_params') + for k in ims.keys(): + image = ims[k] + cv2.imwrite(f'eval_params/{k}.png', image[:, :, :3]) + return ims diff --git a/droid/evaluation/policy_wrapper.py b/droid/evaluation/policy_wrapper.py index d7bb94e..b63c04f 100644 --- a/droid/evaluation/policy_wrapper.py +++ b/droid/evaluation/policy_wrapper.py @@ -1,7 +1,10 @@ import numpy as np import torch +from collections import deque from droid.data_processing.timestep_processing import TimestepProcesser +import robomimic.utils.torch_utils as TorchUtils +import robomimic.utils.tensor_utils as TensorUtils def converter_helper(data, batchify=True): @@ -61,3 +64,142 @@ def forward(self, observation): # import pdb; pdb.set_trace() return np_action + + +class PolicyWrapperRobomimic: + def __init__(self, policy, timestep_filtering_kwargs, image_transform_kwargs, frame_stack, eval_mode=True): + self.policy = policy + + assert eval_mode is True + + self.fs_wrapper = FrameStackWrapper(num_frames=frame_stack) + self.fs_wrapper.reset() + self.policy.start_episode() + + self.timestep_processor = TimestepProcesser( + ignore_action=True, **timestep_filtering_kwargs, image_transform_kwargs=image_transform_kwargs + ) + + def convert_raw_extrinsics_to_Twc(self, raw_data): + """ + helper function that convert raw extrinsics (6d pose) to transformation matrix (Twc) + """ + raw_data = torch.from_numpy(np.array(raw_data)) + pos = raw_data[0:3] + rot_mat = TorchUtils.euler_angles_to_matrix(raw_data[3:6], convention="XYZ") + extrinsics = np.zeros((4, 4)) + extrinsics[:3,:3] = TensorUtils.to_numpy(rot_mat) + extrinsics[:3,3] = TensorUtils.to_numpy(pos) + extrinsics[3,3] = 1.0 + # invert the matrix to represent standard definition of extrinsics: from world to cam + extrinsics = np.linalg.inv(extrinsics) + return extrinsics + + def forward(self, observation): + timestep = {"observation": observation} + processed_timestep = self.timestep_processor.forward(timestep) + + extrinsics_dict = processed_timestep["extrinsics_dict"] + intrinsics_dict = processed_timestep["intrinsics_dict"] + # import pdb; pdb.set_trace() + + obs = { + "robot_state/cartesian_position": observation["robot_state"]["cartesian_position"], + "robot_state/gripper_position": [observation["robot_state"]["gripper_position"]], # wrap as array, raw data is single float + "camera/image/hand_camera_left_image": processed_timestep["observation"]["camera"]["image"]["hand_camera"][0], + "camera/image/hand_camera_right_image": processed_timestep["observation"]["camera"]["image"]["hand_camera"][1], + "camera/image/varied_camera_1_left_image": processed_timestep["observation"]["camera"]["image"]["varied_camera"][0], + "camera/image/varied_camera_1_right_image": processed_timestep["observation"]["camera"]["image"]["varied_camera"][1], + "camera/image/varied_camera_2_left_image": processed_timestep["observation"]["camera"]["image"]["varied_camera"][2], + "camera/image/varied_camera_2_right_image": processed_timestep["observation"]["camera"]["image"]["varied_camera"][3], + + "camera/extrinsics/hand_camera_left": self.convert_raw_extrinsics_to_Twc(extrinsics_dict["hand_camera"][0]), + "camera/extrinsics/hand_camera_right": self.convert_raw_extrinsics_to_Twc(extrinsics_dict["hand_camera"][2]), + "camera/extrinsics/varied_camera_1_left": self.convert_raw_extrinsics_to_Twc(extrinsics_dict["varied_camera"][0]), + "camera/extrinsics/varied_camera_1_right": self.convert_raw_extrinsics_to_Twc(extrinsics_dict["varied_camera"][1]), + "camera/extrinsics/varied_camera_2_left": self.convert_raw_extrinsics_to_Twc(extrinsics_dict["varied_camera"][2]), + "camera/extrinsics/varied_camera_2_right": self.convert_raw_extrinsics_to_Twc(extrinsics_dict["varied_camera"][3]), + + "camera/intrinsics/hand_camera_left": intrinsics_dict["hand_camera"][0], + "camera/intrinsics/hand_camera_right": intrinsics_dict["hand_camera"][1], + "camera/intrinsics/varied_camera_1_left": intrinsics_dict["varied_camera"][0], + "camera/intrinsics/varied_camera_1_right": intrinsics_dict["varied_camera"][1], + "camera/intrinsics/varied_camera_2_left": intrinsics_dict["varied_camera"][2], + "camera/intrinsics/varied_camera_2_right": intrinsics_dict["varied_camera"][3], + } + + # set item of obs as np.array + for k in obs: + obs[k] = np.array(obs[k]) + + self.fs_wrapper.add_obs(obs) + obs_history = self.fs_wrapper.get_obs_history() + action = self.policy(obs_history) + + return action + + def reset(self): + self.fs_wrapper.reset() + self.policy.start_episode() + + +class FrameStackWrapper: + """ + Wrapper for frame stacking observations during rollouts. The agent + receives a sequence of past observations instead of a single observation + when it calls @env.reset, @env.reset_to, or @env.step in the rollout loop. + """ + def __init__(self, num_frames): + """ + Args: + env (EnvBase instance): The environment to wrap. + num_frames (int): number of past observations (including current observation) + to stack together. Must be greater than 1 (otherwise this wrapper would + be a no-op). + """ + self.num_frames = num_frames + + ### TODO: add action padding option + adding action to obs to include action history in obs ### + + # keep track of last @num_frames observations for each obs key + self.obs_history = None + + def _set_initial_obs_history(self, init_obs): + """ + Helper method to get observation history from the initial observation, by + repeating it. + + Returns: + obs_history (dict): a deque for each observation key, with an extra + leading dimension of 1 for each key (for easy concatenation later) + """ + self.obs_history = {} + for k in init_obs: + self.obs_history[k] = deque( + [init_obs[k][None] for _ in range(self.num_frames)], + maxlen=self.num_frames, + ) + + def reset(self): + self.obs_history = None + + def get_obs_history(self): + """ + Helper method to convert internal variable @self.obs_history to a + stacked observation where each key is a numpy array with leading dimension + @self.num_frames. + """ + # concatenate all frames per key so we return a numpy array per key + if self.num_frames == 1: + return { k : np.concatenate(self.obs_history[k], axis=0)[0] for k in self.obs_history } + else: + return { k : np.concatenate(self.obs_history[k], axis=0) for k in self.obs_history } + + def add_obs(self, obs): + if self.obs_history is None: + self._set_initial_obs_history(obs) + + # update frame history + for k in obs: + # make sure to have leading dim of 1 for easy concatenation + self.obs_history[k].append(obs[k][None]) diff --git a/droid/franka/robot.py b/droid/franka/robot.py index 5114ee6..27fe316 100644 --- a/droid/franka/robot.py +++ b/droid/franka/robot.py @@ -42,8 +42,8 @@ def kill_controller(self): self._robot_process.kill() self._gripper_process.kill() - def update_command(self, command, action_space="cartesian_velocity", blocking=False): - action_dict = self.create_action_dict(command, action_space=action_space) + def update_command(self, command, action_space="cartesian_velocity", gripper_action_space=None, blocking=False): + action_dict = self.create_action_dict(command, action_space=action_space, gripper_action_space=gripper_action_space) self.update_joints(action_dict["joint_position"], velocity=False, blocking=blocking) self.update_gripper(action_dict["gripper_position"], velocity=False, blocking=blocking) @@ -177,14 +177,19 @@ def adaptive_time_to_go(self, desired_joint_position, t_min=0, t_max=4): clamped_time_to_go = min(t_max, max(time_to_go, t_min)) return clamped_time_to_go - def create_action_dict(self, action, action_space, robot_state=None): + def create_action_dict(self, action, action_space, gripper_action_space=None, robot_state=None): assert action_space in ["cartesian_position", "joint_position", "cartesian_velocity", "joint_velocity"] if robot_state is None: robot_state = self.get_robot_state()[0] action_dict = {"robot_state": robot_state} velocity = "velocity" in action_space - if velocity: + if gripper_action_space is None: + gripper_action_space = "velocity" if velocity else "position" + assert gripper_action_space in ["velocity", "policy"] + + + if gripper_action_space == "velocity": action_dict["gripper_velocity"] = action[-1] gripper_delta = self._ik_solver.gripper_velocity_to_delta(action[-1]) gripper_position = robot_state["gripper_position"] + gripper_delta diff --git a/droid/misc/server_interface.py b/droid/misc/server_interface.py index 6a8d0a2..1bdfb1c 100644 --- a/droid/misc/server_interface.py +++ b/droid/misc/server_interface.py @@ -42,7 +42,7 @@ def launch_robot(self): def kill_controller(self): self.server.kill_controller() - def update_command(self, command, action_space="cartesian_velocity", blocking=False): + def update_command(self, command, action_space="cartesian_velocity", gripper_action_space="velocity", blocking=False): action_dict = self.server.update_command(command.tolist(), action_space, blocking) return action_dict diff --git a/droid/robot_env.py b/droid/robot_env.py index 90cb960..f555b50 100644 --- a/droid/robot_env.py +++ b/droid/robot_env.py @@ -13,13 +13,14 @@ class RobotEnv(gym.Env): - def __init__(self, action_space="cartesian_velocity", camera_kwargs={}): + def __init__(self, action_space="cartesian_velocity", gripper_action_space=None, camera_kwargs={}, do_reset=True): # Initialize Gym Environment super().__init__() # Define Action Space # assert action_space in ["cartesian_position", "joint_position", "cartesian_velocity", "joint_velocity"] self.action_space = action_space + self.gripper_action_space = gripper_action_space self.check_action_range = "velocity" in action_space # Robot Configuration @@ -42,7 +43,8 @@ def __init__(self, action_space="cartesian_velocity", camera_kwargs={}): self.camera_type_dict = camera_type_dict # Reset Robot - self.reset() + if do_reset: + self.reset() def step(self, action): # Check Action @@ -51,7 +53,11 @@ def step(self, action): assert (action.max() <= 1) and (action.min() >= -1) # Update Robot - action_info = self.update_robot(action, action_space=self.action_space) + action_info = self.update_robot( + action, + action_space=self.action_space, + gripper_action_space=self.gripper_action_space, + ) # Return Action Info return action_info @@ -66,8 +72,13 @@ def reset(self, randomize=False): self._robot.update_joints(self.reset_joints, velocity=False, blocking=True, cartesian_noise=noise) - def update_robot(self, action, action_space="cartesian_velocity", blocking=False): - action_info = self._robot.update_command(action, action_space=action_space, blocking=blocking) + def update_robot(self, action, action_space="cartesian_velocity", gripper_action_space=None, blocking=False): + action_info = self._robot.update_command( + action, + action_space=action_space, + gripper_action_space=gripper_action_space, + blocking=blocking + ) return action_info def create_action_dict(self, action): @@ -112,4 +123,11 @@ def get_observation(self): extrinsics = self.get_camera_extrinsics(state_dict) obs_dict["camera_extrinsics"] = extrinsics + intrinsics = {} + for cam in self.camera_reader.camera_dict.values(): + cam_intr_info = cam.get_intrinsics() + for (full_cam_id, info) in cam_intr_info.items(): + intrinsics[full_cam_id] = info["cameraMatrix"] + obs_dict["camera_intrinsics"] = intrinsics + return obs_dict diff --git a/scripts/evaluation/evaluate_policy.py b/scripts/evaluation/evaluate_policy.py index ffc8b54..7cc55bb 100644 --- a/scripts/evaluation/evaluate_policy.py +++ b/scripts/evaluation/evaluate_policy.py @@ -1,18 +1,65 @@ -from droid.evaluation.eval_launcher import eval_launcher +from droid.evaluation.eval_launcher_robomimic import eval_launcher, get_goal_im +import matplotlib.pyplot as plt +import os +import argparse +import cv2 -variant = dict( - exp_name="policy_test", - save_data=False, - use_gpu=True, - seed=0, - policy_logdir="pen_cup_task/run3/id0/", - model_id=50, - camera_kwargs=dict(), - data_processing_kwargs=dict( - timestep_filtering_kwargs=dict(), - image_transform_kwargs=dict(), - ), -) if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-g', '--capture_goal', action='store_true') + parser.add_argument('-l', '--lang_cond', action='store_true') + parser.add_argument('-c', '--ckpt_path', type=str, default=None, + help='Path to Pytorch checkpoint (.pth) corresponding to the policy you want to evaluate.') + args = parser.parse_args() + + variant = dict( + exp_name="policy_test", + save_data=False, + use_gpu=True, + seed=0, + policy_logdir="test", + task="", + layout_id=None, + model_id=50, + camera_kwargs=dict(), + data_processing_kwargs=dict( + timestep_filtering_kwargs=dict(), + image_transform_kwargs=dict(), + ), + ckpt_path=args.ckpt_path, + ) + + if args.capture_goal: + valid_values = ["yes", "no"] + goal_im_ok = False + + while goal_im_ok is not True: + program_mode = input("Move the robot into programming mode, adjust the scene as needed, and then press Enter to acknowledge: ") + exec_mode = input("Now move the robot into execution mode, press Enter to acknowledge: ") + print("Capturing Goal Image") + goal_ims = get_goal_im(variant, run_id=1, exp_id=0) + # Sort the goal_ims by key and display in a 3 x 2 grid + sort_goal_ims = [cv2.cvtColor(image[1][:, :, :3], cv2.COLOR_BGR2RGB) for image in sorted(goal_ims.items(), key=lambda x: x[0])] + fig, axes = plt.subplots(3, 2, figsize=(8,10)) + axes = axes.flatten() + for i in range(len(sort_goal_ims)): + axes[i].imshow(sort_goal_ims[i]) + axes[i].axis('off') + plt.tight_layout() + plt.show() + + user_input = input("Do these goal images look reasonable? Enter Yes or No: ").lower() + while user_input not in valid_values: + print("Invalid input, Please enter Yes or No") + goal_im_ok = user_input == "yes" + input("Now reset the scene for the policy to execute, press Enter to acknowledge: ") + if args.lang_cond: + user_input = input("Provide a language command for the robot to complete: ").lower() + if not os.path.exists('eval_params'): + os.makedirs('eval_params') + with open('eval_params/lang_command.txt', 'w') as file: + file.write(user_input) + + print("Evaluating Policy") eval_launcher(variant, run_id=1, exp_id=0)