Skip to content

Commit

Permalink
update docker scripts and slightly improve training
Browse files Browse the repository at this point in the history
  • Loading branch information
Kajiih committed Aug 28, 2024
1 parent d38ac20 commit 8ff51e1
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 15 deletions.
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ ENV PYTHONPATH="${PYTHONPATH}:/app/src"
COPY docker/example_agent.py ./


CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash
# CMD export DISPLAY=:0.0 && ai2thor-xorg start && /bin/bash
6 changes: 5 additions & 1 deletion docker/scripts/run.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash
CUSTOM_COMMAND="$1"

DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
cd "$DIR"/../ || exit

Expand All @@ -22,7 +24,9 @@ docker run --privileged $X11_PARAMS -it \
--mount type=bind,source="$(pwd)"/../runs/,target=/app/runs/ \
--mount type=bind,source="$(pwd)"/../checkpoints/,target=/app/checkpoints/ \
--mount type=bind,source="$(pwd)"/../wandb/,target=/app/wandb/ \
rlthor-docker:latest
--mount type=bind,source="$(pwd)"/../examples/benchmark/script/,target=/app/scripts/ \
rlthor-docker:latest /bin/bash -c "export DISPLAY=:0.0 && ai2thor-xorg start && $CUSTOM_COMMAND"


if [[ -e /tmp/.X11-unix && ! -z ${DISPLAY+x} ]]; then
xhost -local:root
Expand Down
4 changes: 2 additions & 2 deletions examples/benchmark/config/experiment_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ wandb:
sync_tensorboard: true
monitor_gym: true
save_code: true
mode: offline
mode: online #offline
sb3_callback:
verbose: 2
model_save_freq: 100_000
gradient_save_freq: 100_000
gradient_save_freq: 500_000

evaluation:
nb_episodes: 10
Expand Down
6 changes: 4 additions & 2 deletions examples/benchmark/experiment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import yaml
from stable_baselines3.common.vec_env import DummyVecEnv

from rl_thor.envs.ai2thor_envs import BaseAI2THOREnv, ITHOREnv
from rl_thor.envs.ai2thor_envs import BaseAI2THOREnv

# TODO: Handle config path better
experiment_config_path = Path(__file__).parent / "config/experiment_config.yaml"
Expand All @@ -36,6 +36,7 @@ class Exp:
model: str
tasks: Iterable[str]
scenes: set[str]
seed: int
job_type: str = "train"
id: str | None = None
experiment_config_path: Path = experiment_config_path
Expand All @@ -58,7 +59,8 @@ def __post_init__(self) -> None:
@property
def name(self) -> str:
"""Return the name of the experiment."""
return f"{self.model}_{"-".join(self.tasks)}_{"-".join(self.sorted_scenes)}_{self.timestamp}"
# return f"{self.model}_{"-".join(self.tasks)}_{"-".join(self.sorted_scenes)}_{self.seed}_{self.timestamp}"
return f"{self.model}_{"-".join(self.tasks)}_{len(self.sorted_scenes)}-scenes_{self.seed}_{self.timestamp}"

# TODO: Improve group naming
@property
Expand Down
2 changes: 1 addition & 1 deletion examples/benchmark/model_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class ModelType(StrEnum):

MODEL_CONFIG = {
"verbose": 1,
"progress_bar": True,
"progress_bar": False,
}


Expand Down
29 changes: 24 additions & 5 deletions examples/benchmark/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Run a stable-baselines3 agent in the AI2THOR RL environment."""

import sys
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Any, Optional

Expand Down Expand Up @@ -59,11 +60,11 @@ def make_env(


def main(
task: AvailableTask,
task: AvailableTask = AvailableTask.PREPARE_MEAL,
nb_scenes: int = 1,
model_name: Annotated[ModelType, typer.Option("--model", case_sensitive=False)] = ModelType.PPO,
rollout_length: Annotated[Optional[int], typer.Option("--rollout", "-r")] = None, # noqa: UP007
total_timesteps: Annotated[int, typer.Option("--timesteps", "-s")] = 1_000_000,
total_timesteps: Annotated[int, typer.Option("--timesteps", "-s")] = 3_000_000,
record: bool = False,
log_full_env_metrics: Annotated[bool, typer.Option("--log-metrics", "-l")] = False,
no_task_advancement_reward: Annotated[bool, typer.Option("--no-adv", "-n")] = False,
Expand Down Expand Up @@ -101,7 +102,14 @@ def main(
scenes = {scenes for task_config in task_blueprint_config for scenes in task_config["scenes"]}

# === Load the environment and experiment configurations ===
experiment = Exp(model=model_name, tasks=[task], scenes=scenes, project_name=project_name, group_name=group_name)
experiment = Exp(
model=model_name,
tasks=[task],
scenes=scenes,
seed=seed,
project_name=project_name,
group_name=group_name,
)
config_override: dict[str, Any] = {"tasks": {"task_blueprints": task_blueprint_config}}
config_override["no_task_advancement_reward"] = no_task_advancement_reward
if rollout_length is not None:
Expand All @@ -111,7 +119,7 @@ def main(
# Add action groups override config
config_override.update(get_action_groups_override_config(task))
wandb_config = experiment.config["wandb"]
tags = ["simple_actions", "single_task", model_name, *scenes, task, experiment.job_type, wandb_config["project"]]
tags = ["simple_actions", model_name, *scenes, task, experiment.job_type, wandb_config["project"]]
tags.extend((
"single_task" if is_single_task else "multi_task",
experiment.group_name if experiment.group_name is not None else "no_group",
Expand All @@ -122,6 +130,7 @@ def main(
"randomize_agent_position" if randomize_agent_position else "no_randomize_agent_position",
))

# wandb.require("core")
run: Run = wandb.init( # type: ignore
config=experiment.config | env_config | {"tasks": {"task_blueprints": task_blueprint_config}},
mode=wandb_config["mode"],
Expand All @@ -138,7 +147,13 @@ def main(
# Save infos about the run
experiment.log_dir.mkdir(parents=True, exist_ok=True)
run_info_path = experiment.log_dir / "run_info.yaml"
run_info = {"tags": tags, "env_config": env_config, "experiment_config": experiment.config}
run_info = {
"tags": tags,
"env_config": env_config,
"experiment_config": experiment.config,
"finished": False,
"command": " ".join(sys.argv[:]),
}
with run_info_path.open("w") as f:
yaml.dump(run_info, f)

Expand Down Expand Up @@ -227,6 +242,10 @@ def main(
callback=CallbackList(callbacks),
)

with run_info_path.open("w") as f:
run_info["finished"] = True
yaml.dump(run_info, f)

env.close()
run.finish()

Expand Down
6 changes: 3 additions & 3 deletions src/rl_thor/envs/ai2thor_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ def reset(
observation (dict[str, NDArray[np.uint8] | str]): Observation of the environment.
info (dict): Additional information about the environment.
"""
print("Resetting environment.")
# print("Resetting environment.")
super().reset(seed=seed, options=options)
if options is None:
options = {}
Expand Down Expand Up @@ -619,10 +619,10 @@ def _sample_scene_and_reset_controller_task_reward(
task_completion = False
# Repeat until a compatible scene is found and remove incompatible ones from the task blueprint
while not successful_reset or task_completion:
print(f"Sampling a scene from the task blueprint {task_blueprint.task_type.__name__}.")
# print(f"Sampling a scene from the task blueprint {task_blueprint.task_type.__name__}.")
sorted_scenes = sorted(task_blueprint.scenes)
sampled_scene = self.np_random.choice(sorted_scenes)
print(f"Sampled scene: {sampled_scene}.")
# print(f"Sampled scene: {sampled_scene}.")

successful_reset, task_completion, task_info, scene_initialization_time = (
self._reset_controller_task_reward(sampled_scene)
Expand Down

0 comments on commit 8ff51e1

Please sign in to comment.