This repository was archived by the owner on Aug 22, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_high_level.py
78 lines (67 loc) · 2.33 KB
/
run_high_level.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from analysis import Analysis
from simulator.gym_wrappers import PacBotEnv
from simulator.visualizer import Visualizer
from simulator.game_engine.variables import up, down, left
from constants import UP, DOWN, LEFT, RIGHT
from policies.high_level_policy import HighLevelPolicy
import time
import pygame
import argparse
def convert_direction(direction):
if direction == up:
return RIGHT
if direction == down:
return LEFT
if direction == left:
return UP
return DOWN
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--s", help="Turn on slow mode", action="store_true")
parser.add_argument("--o", help="Output file name", type=str)
parser.add_argument("--t", help="Number of trials", type=int, default=5)
parser.add_argument("--d", help="Toggle debug", action="store_false")
args = parser.parse_args()
visualizer = Visualizer()
env = PacBotEnv(speed=0.5)
obs = env.reset()
grid = env.render()
visualizer.draw_grid(grid, env.orientation, env.game_state.state)
policy = HighLevelPolicy(debug=args.d)
analysis = Analysis(args.o)
done = False
key = 0
trials = 0
extra = {"life_lost": False}
done = False
game_agents = [
env.game_state.red,
env.game_state.orange,
env.game_state.pink,
env.game_state.blue,
]
worst_loss = float("-inf")
while key != pygame.K_q and trials < args.t:
state = policy.get_state(env, obs, done, extra)
analysis.log_replay(env, state, extra["life_lost"], policy)
pre = time.time()
action = policy.get_action(state)
analysis.log_calc(time.time() - pre)
obs, reward, done, extra = env.step(action)
grid = env.render()
visualizer.draw_grid(grid, env.orientation, env.game_state.state)
if done:
analysis.log_endgame(env)
trials += 1
if not env.game_state._are_all_pellets_eaten():
if env.game_state.pellets > worst_loss:
worst_loss = env.game_state.pellets
analysis.write_replay(env)
analysis.reset()
policy.reset()
obs = env.reset()
env.render()
key = visualizer.wait_ai_control()
if args.s:
time.sleep(0.1)
analysis.write()