diff --git a/examples/model_manager.ipynb b/examples/model_manager.ipynb index 73b72014..049d2446 100644 --- a/examples/model_manager.ipynb +++ b/examples/model_manager.ipynb @@ -81,133 +81,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Experiment: \n", - "Experiment (242317125620601262) runs:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
run_idexperiment_idstatusartifact_uristart_timeend_timemetrics.Loss/entropy_lossmetrics.Test/cumulative_rewardmetrics.Info/ent_coefmetrics.Info/learning_rate...params.algo/gae_lambdaparams.env/action_repeatparams.env/grayscaleparams.metric/aggregator/metrics/Loss/policy_loss/sync_on_computeparams.metric/log_leveltags.mlflow.usertags.mlflow.source.typetags.mlflow.runNametags.mlflow.source.nametags.mlflow.log-model.history
01e453cf2114d43f28410803df985598a242317125620601262FINISHEDmlflow-artifacts:/242317125620601262/1e453cf21...2023-12-07 11:45:59.641000+00:002023-12-07 11:46:10.350000+00:00-0.68703148.00.00.001...0.951FalseFalse1mmilesiLOCALppo_CartPole-v1_2023-12-07_12-45-58/home/mmilesi/miniconda3/envs/sheeprl/lib/pyth...[{\"run_id\": \"1e453cf2114d43f28410803df985598a\"...
\n", - "

1 rows × 130 columns

\n", - "
" - ], - "text/plain": [ - " run_id experiment_id status \\\n", - "0 1e453cf2114d43f28410803df985598a 242317125620601262 FINISHED \n", - "\n", - " artifact_uri \\\n", - "0 mlflow-artifacts:/242317125620601262/1e453cf21... \n", - "\n", - " start_time end_time \\\n", - "0 2023-12-07 11:45:59.641000+00:00 2023-12-07 11:46:10.350000+00:00 \n", - "\n", - " metrics.Loss/entropy_loss metrics.Test/cumulative_reward \\\n", - "0 -0.687031 48.0 \n", - "\n", - " metrics.Info/ent_coef metrics.Info/learning_rate ... \\\n", - "0 0.0 0.001 ... \n", - "\n", - " params.algo/gae_lambda params.env/action_repeat params.env/grayscale \\\n", - "0 0.95 1 False \n", - "\n", - " params.metric/aggregator/metrics/Loss/policy_loss/sync_on_compute \\\n", - "0 False \n", - "\n", - " params.metric/log_level tags.mlflow.user tags.mlflow.source.type \\\n", - "0 1 mmilesi LOCAL \n", - "\n", - " tags.mlflow.runName \\\n", - "0 ppo_CartPole-v1_2023-12-07_12-45-58 \n", - "\n", - " tags.mlflow.source.name \\\n", - "0 /home/mmilesi/miniconda3/envs/sheeprl/lib/pyth... \n", - "\n", - " tags.mlflow.log-model.history \n", - "0 [{\"run_id\": \"1e453cf2114d43f28410803df985598a\"... \n", - "\n", - "[1 rows x 130 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import mlflow\n", "\n", @@ -229,26 +105,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Name: mlflow_example_agent\n", - "Description: # MODEL CHANGELOG\n", - "## **Version 1**\n", - "### Author: mmilesi\n", - "### Date: 07/12/2023 12:46:10 CET\n", - "### Description: \n", - "PPO Agent in CartPole-v1 Environment\n", - "\n", - "Tags: {}\n", - "Latest Version: 1\n" - ] - } - ], + "outputs": [], "source": [ "from sheeprl.utils.mlflow import MlflowModelManager\n", "from lightning import Fabric\n", @@ -315,33 +174,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023/12/07 12:47:03 WARNING mlflow.utils.requirements_utils: The following packages were not found in the public PyPI package index as of 2023-10-28; if these packages are not present in the public PyPI index, you must install them manually before loading your model: {'sheeprl'}\n", - "Registered model 'mlflow_example_agent' already exists. Creating a new version of this model...\n", - "2023/12/07 12:47:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: mlflow_example_agent, version 2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Registered model mlflow_example_agent with version 2\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Created version '2' of model 'mlflow_example_agent'.\n" - ] - } - ], + "outputs": [], "source": [ "from sheeprl.cli import registration\n", "\n", @@ -374,31 +209,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Name: mlflow_example_agent\n", - "Description: # MODEL CHANGELOG\n", - "## **Version 1**\n", - "### Author: mmilesi\n", - "### Date: 07/12/2023 12:46:10 CET\n", - "### Description: \n", - "PPO Agent in CartPole-v1 Environment\n", - "## **Version 2**\n", - "### Author: mmilesi\n", - "### Date: 07/12/2023 12:47:04 CET\n", - "### Description: \n", - "New PPO Agent version trained in CartPole-v1 environment\n", - "\n", - "Tags: {}\n", - "Latest Version: 2\n" - ] - } - ], + "outputs": [], "source": [ "model_info = mlflow.search_registered_models(filter_string=f\"name='{model_name}'\")[-1]\n", "print(\"Name:\", model_info.name)\n", @@ -418,32 +231,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Transitioning model mlflow_example_agent version 2 from None to staging\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_manager.transition_model(\n", " model_name=\"mlflow_example_agent\",\n", @@ -489,42 +279,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Successfully registered model 'ppo_agent_cartpole_best_reward'.\n", - "2023/12/07 12:47:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ppo_agent_cartpole_best_reward, version 1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Registered model ppo_agent_cartpole_best_reward with version 1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Created version '1' of model 'ppo_agent_cartpole_best_reward'.\n" - ] - }, - { - "data": { - "text/plain": [ - "{'agent': }" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "models_info = {\n", " \"agent\": {\n", @@ -547,60 +304,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model named mlflow_example_agent with version 1 does not exist\n" - ] - }, - { - "data": { - "text/plain": [ - "], name='mlflow_example_agent', tags={}>" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_manager.delete_model(\n", " model_name, int(latest_version.version) - 1, f\"Delete model version {int(latest_version.version)-1}\"\n", diff --git a/notebooks/dreamer_v3_imagination.ipynb b/notebooks/dreamer_v3_imagination.ipynb index e03e5b04..e451b62c 100644 --- a/notebooks/dreamer_v3_imagination.ipynb +++ b/notebooks/dreamer_v3_imagination.ipynb @@ -34,6 +34,17 @@ "!pip install torchvision" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"MUJOCO_GL\"] = \"egl\"" + ] + }, { "cell_type": "code", "execution_count": null, @@ -86,6 +97,7 @@ "source": [ "seed = 5\n", "fabric = Fabric(accelerator=\"cuda\", devices=1)\n", + "fabric.launch()\n", "state = fabric.load(ckpt_path)\n", "cfg = dotdict(OmegaConf.to_container(OmegaConf.load(ckpt_path.parent.parent / \"config.yaml\"), resolve=True))\n", "\n", @@ -99,7 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "envs = gym.vector.AsyncVectorEnv(\n", + "envs = gym.vector.SyncVectorEnv(\n", " [\n", " make_env(\n", " cfg,\n", @@ -187,9 +199,7 @@ " step_data[k] = obs[k][np.newaxis]\n", "step_data[\"dones\"] = np.zeros((1, cfg.env.num_envs, 1))\n", "step_data[\"rewards\"] = np.zeros((1, cfg.env.num_envs, 1))\n", - "step_data[\"is_first\"] = np.ones_like(step_data[\"dones\"])\n", - "step_data[\"stochastic_state\"] = player.stochastic_state.detach().cpu().numpy()\n", - "step_data[\"recurrent_state\"] = player.recurrent_state.detach().cpu().numpy()" + "step_data[\"is_first\"] = np.ones_like(step_data[\"dones\"])" ] }, { @@ -215,7 +225,7 @@ " for k, v in obs.items():\n", " preprocessed_obs[k] = torch.as_tensor(v[np.newaxis], dtype=torch.float32, device=fabric.device)\n", " if k in cfg.algo.cnn_keys.encoder:\n", - " preprocessed_obs[k] = preprocessed_obs[k] / 255.0\n", + " preprocessed_obs[k] = preprocessed_obs[k] / 255.0 - 0.5\n", " mask = {k: v for k, v in preprocessed_obs.items() if k.startswith(\"mask\")}\n", " if len(mask) == 0:\n", " mask = None\n", @@ -226,6 +236,8 @@ " else:\n", " real_actions = torch.stack([real_act.argmax(dim=-1) for real_act in real_actions], dim=-1).cpu().numpy()\n", "\n", + " step_data[\"stochastic_state\"] = player.stochastic_state.detach().cpu().numpy()\n", + " step_data[\"recurrent_state\"] = player.recurrent_state.detach().cpu().numpy()\n", " step_data[\"actions\"] = actions.reshape((1, cfg.env.num_envs, -1))\n", " rb_initial.add(step_data, validate_args=cfg.buffer.validate_args)\n", "\n", @@ -262,8 +274,6 @@ " step_data[\"dones\"] = dones.reshape((1, cfg.env.num_envs, -1))\n", " step_data[\"rewards\"] = clip_rewards_fn(rewards)\n", " step_data[\"rewards\"] = clip_rewards_fn(rewards)\n", - " step_data[\"stochastic_state\"] = player.stochastic_state.detach().cpu().numpy()\n", - " step_data[\"recurrent_state\"] = player.recurrent_state.detach().cpu().numpy()\n", " dones_idxes = dones.nonzero()[0].tolist()\n", " reset_envs = len(dones_idxes)\n", " if reset_envs > 0:\n", @@ -283,7 +293,7 @@ " player.init_states(dones_idxes)\n", "\n", " ## Save the recurrent and stochastic latent states for the imagination phase\n", - " if i == initial_steps - imagination_steps - 1:\n", + " if i == initial_steps - imagination_steps:\n", " stochastic_state = player.stochastic_state.clone()\n", " recurrent_state = player.recurrent_state.clone()" ] @@ -338,10 +348,11 @@ "\n", " # imagination step\n", " stochastic_state, recurrent_state = world_model.rssm.imagination(stochastic_state, recurrent_state, actions)\n", + " stochastic_state = stochastic_state.view(1, 1, -1)\n", " # update current state\n", - " imagined_latent_states = torch.cat((stochastic_state.view(1, 1, -1), recurrent_state), -1)\n", + " imagined_latent_states = torch.cat((stochastic_state, recurrent_state), -1)\n", " rec_obs = world_model.observation_model(imagined_latent_states)\n", - " step_data[\"rgb\"] = rec_obs[\"rgb\"].unsqueeze(0).detach().cpu().numpy()\n", + " step_data[\"rgb\"] = rec_obs[\"rgb\"].unsqueeze(0).detach().cpu().numpy() + 0.5\n", " step_data[\"actions\"] = actions.unsqueeze(0).detach().cpu().numpy()\n", " rb_imagination.add(step_data)\n", "\n", @@ -408,6 +419,13 @@ "frame_one = frames[0]\n", "frame_one.save(\"real_obs.gif\", format=\"GIF\", append_images=frames, save_all=True, duration=100, loop=0)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -427,8 +445,7 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" - }, - "orig_nbformat": 4 + } }, "nbformat": 4, "nbformat_minor": 2