def get_env_feedback(S, A):
# This is how agent will interact with the environment
if A == 'right':
S_ = S + 1
else: # move left
S_ = S - 1
if S_ == -1:
S_ = 0
if S_ == N_STATES - 1:
S_ = 'terminal'
R = 1
else: R = 0
return S_, R
def get_env_feedback(S, A):
# This is how agent will interact with the environment
if A == 'right':
if S == N_STATES - 2:
S_ = 'terminal'
R = 1
else:
S_ = S + 1
R = 0
else: # move left
R = 0
if S == 0:
S_ = S
else:
S_ = S - 1
return S_, R
下边的思路好奇怪啊,oral coding?