You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardexpand all lines: source/_static/code/en/model/rl/rl.py
+8-3
Original file line number
Diff line number
Diff line change
@@ -54,16 +54,21 @@ def predict(self, inputs):
54
54
action=action[0]
55
55
next_state, reward, done, info=env.step(action) # Let the environment to execute the action, get the next state of the action, the reward of the action, whether the game is done and extra information.
56
56
reward=-10.ifdoneelsereward# Give a large negative reward if the game is over.
57
-
replay_buffer.append((state, action, reward, next_state, done)) # Put the (state, action, reward, next_state) quad back into the experience replay pool.
57
+
replay_buffer.append((state, action, reward, next_state, 1ifdoneelse0)) # Put the (state, action, reward, next_state) quad back into the experience replay pool.
58
58
state=next_state
59
59
60
60
ifdone: # Exit this round and enter the next episode if the game is over.
0 commit comments