update docker scripts and slightly improve training

Kajiih · Aug 28, 2024 · 8ff51e1 · 8ff51e1
1 parent d38ac20
commit 8ff51e1
Show file tree

Hide file tree

Showing 7 changed files with 40 additions and 15 deletions.
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -30,4 +30,4 @@ ENV PYTHONPATH="${PYTHONPATH}:/app/src"
 COPY docker/example_agent.py ./
 
 
-CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash
+# CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash
diff --git a/docker/scripts/run.sh b/docker/scripts/run.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
+CUSTOM_COMMAND="$1"
+
 DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 cd "$DIR"/../ || exit
 
@@ -22,7 +24,9 @@ docker run --privileged $X11_PARAMS -it \
 --mount type=bind,source="$(pwd)"/../runs/,target=/app/runs/ \
 --mount type=bind,source="$(pwd)"/../checkpoints/,target=/app/checkpoints/ \
 --mount type=bind,source="$(pwd)"/../wandb/,target=/app/wandb/ \
-rlthor-docker:latest
+--mount type=bind,source="$(pwd)"/../examples/benchmark/script/,target=/app/scripts/ \
+rlthor-docker:latest /bin/bash -c "export DISPLAY=:0.0 && ai2thor-xorg start && $CUSTOM_COMMAND"
+
 
 if [[ -e /tmp/.X11-unix && ! -z ${DISPLAY+x} ]]; then
     xhost -local:root

diff --git a/examples/benchmark/config/experiment_config.yaml b/examples/benchmark/config/experiment_config.yaml
@@ -3,11 +3,11 @@ wandb:
   sync_tensorboard: true
   monitor_gym: true
   save_code: true
-  mode: offline
+  mode: online #offline
   sb3_callback:
     verbose: 2
     model_save_freq: 100_000
-    gradient_save_freq: 100_000
+    gradient_save_freq: 500_000
 
 evaluation:
   nb_episodes: 10

diff --git a/examples/benchmark/experiment_utils.py b/examples/benchmark/experiment_utils.py
@@ -11,7 +11,7 @@
 import yaml
 from stable_baselines3.common.vec_env import DummyVecEnv
 
-from rl_thor.envs.ai2thor_envs import BaseAI2THOREnv, ITHOREnv
+from rl_thor.envs.ai2thor_envs import BaseAI2THOREnv
 
 # TODO: Handle config path better
 experiment_config_path = Path(__file__).parent / "config/experiment_config.yaml"
@@ -36,6 +36,7 @@ class Exp:
     model: str
     tasks: Iterable[str]
     scenes: set[str]
+    seed: int
     job_type: str = "train"
     id: str | None = None
     experiment_config_path: Path = experiment_config_path
@@ -58,7 +59,8 @@ def __post_init__(self) -> None:
     @property
     def name(self) -> str:
         """Return the name of the experiment."""
-        return f"{self.model}_{"-".join(self.tasks)}_{"-".join(self.sorted_scenes)}_{self.timestamp}"
+        # return f"{self.model}_{"-".join(self.tasks)}_{"-".join(self.sorted_scenes)}_{self.seed}_{self.timestamp}"
+        return f"{self.model}_{"-".join(self.tasks)}_{len(self.sorted_scenes)}-scenes_{self.seed}_{self.timestamp}"
 
     # TODO: Improve group naming
     @property

diff --git a/examples/benchmark/model_info.py b/examples/benchmark/model_info.py
@@ -18,7 +18,7 @@ class ModelType(StrEnum):
 
 MODEL_CONFIG = {
     "verbose": 1,
-    "progress_bar": True,
+    "progress_bar": False,
 }
 
 

diff --git a/examples/benchmark/train.py b/examples/benchmark/train.py
@@ -1,5 +1,6 @@
 """Run a stable-baselines3 agent in the AI2THOR RL environment."""
 
+import sys
 from pathlib import Path
 from typing import TYPE_CHECKING, Annotated, Any, Optional
 
@@ -59,11 +60,11 @@ def make_env(
 
 
 def main(
-    task: AvailableTask,
+    task: AvailableTask = AvailableTask.PREPARE_MEAL,
     nb_scenes: int = 1,
     model_name: Annotated[ModelType, typer.Option("--model", case_sensitive=False)] = ModelType.PPO,
     rollout_length: Annotated[Optional[int], typer.Option("--rollout", "-r")] = None,  # noqa: UP007
-    total_timesteps: Annotated[int, typer.Option("--timesteps", "-s")] = 1_000_000,
+    total_timesteps: Annotated[int, typer.Option("--timesteps", "-s")] = 3_000_000,
     record: bool = False,
     log_full_env_metrics: Annotated[bool, typer.Option("--log-metrics", "-l")] = False,
     no_task_advancement_reward: Annotated[bool, typer.Option("--no-adv", "-n")] = False,
@@ -101,7 +102,14 @@ def main(
     scenes = {scenes for task_config in task_blueprint_config for scenes in task_config["scenes"]}
 
     # === Load the environment and experiment configurations ===
-    experiment = Exp(model=model_name, tasks=[task], scenes=scenes, project_name=project_name, group_name=group_name)
+    experiment = Exp(
+        model=model_name,
+        tasks=[task],
+        scenes=scenes,
+        seed=seed,
+        project_name=project_name,
+        group_name=group_name,
+    )
     config_override: dict[str, Any] = {"tasks": {"task_blueprints": task_blueprint_config}}
     config_override["no_task_advancement_reward"] = no_task_advancement_reward
     if rollout_length is not None:
@@ -111,7 +119,7 @@ def main(
     # Add action groups override config
     config_override.update(get_action_groups_override_config(task))
     wandb_config = experiment.config["wandb"]
-    tags = ["simple_actions", "single_task", model_name, *scenes, task, experiment.job_type, wandb_config["project"]]
+    tags = ["simple_actions", model_name, *scenes, task, experiment.job_type, wandb_config["project"]]
     tags.extend((
         "single_task" if is_single_task else "multi_task",
         experiment.group_name if experiment.group_name is not None else "no_group",
@@ -122,6 +130,7 @@ def main(
         "randomize_agent_position" if randomize_agent_position else "no_randomize_agent_position",
     ))
 
+    # wandb.require("core")
     run: Run = wandb.init(  # type: ignore
         config=experiment.config | env_config | {"tasks": {"task_blueprints": task_blueprint_config}},
         mode=wandb_config["mode"],
@@ -138,7 +147,13 @@ def main(
     # Save infos about the run
     experiment.log_dir.mkdir(parents=True, exist_ok=True)
     run_info_path = experiment.log_dir / "run_info.yaml"
-    run_info = {"tags": tags, "env_config": env_config, "experiment_config": experiment.config}
+    run_info = {
+        "tags": tags,
+        "env_config": env_config,
+        "experiment_config": experiment.config,
+        "finished": False,
+        "command": " ".join(sys.argv[:]),
+    }
     with run_info_path.open("w") as f:
         yaml.dump(run_info, f)
 
@@ -227,6 +242,10 @@ def main(
             callback=CallbackList(callbacks),
         )
 
+    with run_info_path.open("w") as f:
+        run_info["finished"] = True
+        yaml.dump(run_info, f)
+
     env.close()
     run.finish()
 

diff --git a/src/rl_thor/envs/ai2thor_envs.py b/src/rl_thor/envs/ai2thor_envs.py
@@ -500,7 +500,7 @@ def reset(
             observation (dict[str, NDArray[np.uint8] | str]): Observation of the environment.
             info (dict): Additional information about the environment.
         """
-        print("Resetting environment.")
+        # print("Resetting environment.")
         super().reset(seed=seed, options=options)
         if options is None:
             options = {}
@@ -619,10 +619,10 @@ def _sample_scene_and_reset_controller_task_reward(
         task_completion = False
         # Repeat until a compatible scene is found and remove incompatible ones from the task blueprint
         while not successful_reset or task_completion:
-            print(f"Sampling a scene from the task blueprint {task_blueprint.task_type.__name__}.")
+            # print(f"Sampling a scene from the task blueprint {task_blueprint.task_type.__name__}.")
             sorted_scenes = sorted(task_blueprint.scenes)
             sampled_scene = self.np_random.choice(sorted_scenes)
-            print(f"Sampled scene: {sampled_scene}.")
+            # print(f"Sampled scene: {sampled_scene}.")
 
             successful_reset, task_completion, task_info, scene_initialization_time = (
                 self._reset_controller_task_reward(sampled_scene)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,4 +30,4 @@ ENV PYTHONPATH="${PYTHONPATH}:/app/src"
		COPY docker/example_agent.py ./


		CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash
		# CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash