diff --git a/docker/Dockerfile b/docker/Dockerfile index 6cfa367..49555b1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -30,4 +30,4 @@ ENV PYTHONPATH="${PYTHONPATH}:/app/src" COPY docker/example_agent.py ./ -CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash +# CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash diff --git a/docker/scripts/run.sh b/docker/scripts/run.sh index 327f55b..ecfd5eb 100755 --- a/docker/scripts/run.sh +++ b/docker/scripts/run.sh @@ -1,4 +1,6 @@ #!/bin/bash +CUSTOM_COMMAND="$1" + DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) cd "$DIR"/../ || exit @@ -22,7 +24,9 @@ docker run --privileged $X11_PARAMS -it \ --mount type=bind,source="$(pwd)"/../runs/,target=/app/runs/ \ --mount type=bind,source="$(pwd)"/../checkpoints/,target=/app/checkpoints/ \ --mount type=bind,source="$(pwd)"/../wandb/,target=/app/wandb/ \ -rlthor-docker:latest +--mount type=bind,source="$(pwd)"/../examples/benchmark/script/,target=/app/scripts/ \ +rlthor-docker:latest /bin/bash -c "export DISPLAY=:0.0 && ai2thor-xorg start && $CUSTOM_COMMAND" + if [[ -e /tmp/.X11-unix && ! -z ${DISPLAY+x} ]]; then xhost -local:root diff --git a/examples/benchmark/config/experiment_config.yaml b/examples/benchmark/config/experiment_config.yaml index 6ba6708..67e6098 100644 --- a/examples/benchmark/config/experiment_config.yaml +++ b/examples/benchmark/config/experiment_config.yaml @@ -3,11 +3,11 @@ wandb: sync_tensorboard: true monitor_gym: true save_code: true - mode: offline + mode: online #offline sb3_callback: verbose: 2 model_save_freq: 100_000 - gradient_save_freq: 100_000 + gradient_save_freq: 500_000 evaluation: nb_episodes: 10 diff --git a/examples/benchmark/experiment_utils.py b/examples/benchmark/experiment_utils.py index f24fa51..35ec778 100644 --- a/examples/benchmark/experiment_utils.py +++ b/examples/benchmark/experiment_utils.py @@ -11,7 +11,7 @@ import yaml from stable_baselines3.common.vec_env import DummyVecEnv -from rl_thor.envs.ai2thor_envs import BaseAI2THOREnv, ITHOREnv +from rl_thor.envs.ai2thor_envs import BaseAI2THOREnv # TODO: Handle config path better experiment_config_path = Path(__file__).parent / "config/experiment_config.yaml" @@ -36,6 +36,7 @@ class Exp: model: str tasks: Iterable[str] scenes: set[str] + seed: int job_type: str = "train" id: str | None = None experiment_config_path: Path = experiment_config_path @@ -58,7 +59,8 @@ def __post_init__(self) -> None: @property def name(self) -> str: """Return the name of the experiment.""" - return f"{self.model}_{"-".join(self.tasks)}_{"-".join(self.sorted_scenes)}_{self.timestamp}" + # return f"{self.model}_{"-".join(self.tasks)}_{"-".join(self.sorted_scenes)}_{self.seed}_{self.timestamp}" + return f"{self.model}_{"-".join(self.tasks)}_{len(self.sorted_scenes)}-scenes_{self.seed}_{self.timestamp}" # TODO: Improve group naming @property diff --git a/examples/benchmark/model_info.py b/examples/benchmark/model_info.py index 41b1392..75f1237 100644 --- a/examples/benchmark/model_info.py +++ b/examples/benchmark/model_info.py @@ -18,7 +18,7 @@ class ModelType(StrEnum): MODEL_CONFIG = { "verbose": 1, - "progress_bar": True, + "progress_bar": False, } diff --git a/examples/benchmark/train.py b/examples/benchmark/train.py index 10b2aea..0ab25e8 100644 --- a/examples/benchmark/train.py +++ b/examples/benchmark/train.py @@ -1,5 +1,6 @@ """Run a stable-baselines3 agent in the AI2THOR RL environment.""" +import sys from pathlib import Path from typing import TYPE_CHECKING, Annotated, Any, Optional @@ -59,11 +60,11 @@ def make_env( def main( - task: AvailableTask, + task: AvailableTask = AvailableTask.PREPARE_MEAL, nb_scenes: int = 1, model_name: Annotated[ModelType, typer.Option("--model", case_sensitive=False)] = ModelType.PPO, rollout_length: Annotated[Optional[int], typer.Option("--rollout", "-r")] = None, # noqa: UP007 - total_timesteps: Annotated[int, typer.Option("--timesteps", "-s")] = 1_000_000, + total_timesteps: Annotated[int, typer.Option("--timesteps", "-s")] = 3_000_000, record: bool = False, log_full_env_metrics: Annotated[bool, typer.Option("--log-metrics", "-l")] = False, no_task_advancement_reward: Annotated[bool, typer.Option("--no-adv", "-n")] = False, @@ -101,7 +102,14 @@ def main( scenes = {scenes for task_config in task_blueprint_config for scenes in task_config["scenes"]} # === Load the environment and experiment configurations === - experiment = Exp(model=model_name, tasks=[task], scenes=scenes, project_name=project_name, group_name=group_name) + experiment = Exp( + model=model_name, + tasks=[task], + scenes=scenes, + seed=seed, + project_name=project_name, + group_name=group_name, + ) config_override: dict[str, Any] = {"tasks": {"task_blueprints": task_blueprint_config}} config_override["no_task_advancement_reward"] = no_task_advancement_reward if rollout_length is not None: @@ -111,7 +119,7 @@ def main( # Add action groups override config config_override.update(get_action_groups_override_config(task)) wandb_config = experiment.config["wandb"] - tags = ["simple_actions", "single_task", model_name, *scenes, task, experiment.job_type, wandb_config["project"]] + tags = ["simple_actions", model_name, *scenes, task, experiment.job_type, wandb_config["project"]] tags.extend(( "single_task" if is_single_task else "multi_task", experiment.group_name if experiment.group_name is not None else "no_group", @@ -122,6 +130,7 @@ def main( "randomize_agent_position" if randomize_agent_position else "no_randomize_agent_position", )) + # wandb.require("core") run: Run = wandb.init( # type: ignore config=experiment.config | env_config | {"tasks": {"task_blueprints": task_blueprint_config}}, mode=wandb_config["mode"], @@ -138,7 +147,13 @@ def main( # Save infos about the run experiment.log_dir.mkdir(parents=True, exist_ok=True) run_info_path = experiment.log_dir / "run_info.yaml" - run_info = {"tags": tags, "env_config": env_config, "experiment_config": experiment.config} + run_info = { + "tags": tags, + "env_config": env_config, + "experiment_config": experiment.config, + "finished": False, + "command": " ".join(sys.argv[:]), + } with run_info_path.open("w") as f: yaml.dump(run_info, f) @@ -227,6 +242,10 @@ def main( callback=CallbackList(callbacks), ) + with run_info_path.open("w") as f: + run_info["finished"] = True + yaml.dump(run_info, f) + env.close() run.finish() diff --git a/src/rl_thor/envs/ai2thor_envs.py b/src/rl_thor/envs/ai2thor_envs.py index 5791308..9c2bae8 100644 --- a/src/rl_thor/envs/ai2thor_envs.py +++ b/src/rl_thor/envs/ai2thor_envs.py @@ -500,7 +500,7 @@ def reset( observation (dict[str, NDArray[np.uint8] | str]): Observation of the environment. info (dict): Additional information about the environment. """ - print("Resetting environment.") + # print("Resetting environment.") super().reset(seed=seed, options=options) if options is None: options = {} @@ -619,10 +619,10 @@ def _sample_scene_and_reset_controller_task_reward( task_completion = False # Repeat until a compatible scene is found and remove incompatible ones from the task blueprint while not successful_reset or task_completion: - print(f"Sampling a scene from the task blueprint {task_blueprint.task_type.__name__}.") + # print(f"Sampling a scene from the task blueprint {task_blueprint.task_type.__name__}.") sorted_scenes = sorted(task_blueprint.scenes) sampled_scene = self.np_random.choice(sorted_scenes) - print(f"Sampled scene: {sampled_scene}.") + # print(f"Sampled scene: {sampled_scene}.") successful_reset, task_completion, task_info, scene_initialization_time = ( self._reset_controller_task_reward(sampled_scene)