第一章初始强化学习

实例——“小车上山”

步骤1，获取环境对象

env = gym.make('MountainCar-v0')
print('观测空间 = {}'.format(env.observation_space))
print('动作空间 = {}'.format(env.action_space))
print('观测范围 = {} ~ {}'.format(env.observation_space.low,
        env.observation_space.high))
print('动作数 = {}'.format(env.action_space.n))

观测空间 = Box(2,) ——观测空间为形状为（2，）的浮点型np.array,第一维为位置x,第二维为速度

动作空间 = Discrete(3) ——动作空间{0：添加下山的力,1：不添加力,2：添加上山的力}

观测范围 = [-1.2 -0.07] ~ [0.6 0.07]

动作数 = 3

步骤2，创建智能体——上山/下山原理没看懂

class BespokeAgent:
    def __init__(self, env):
        pass

    def decide(self, observation):  # 决策
        print('observation',observation)
        position, velocity = observation
        lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
                 0.3 * (position + 0.9) ** 4 - 0.008)
        ub = -0.07 * (position + 0.38) ** 2 + 0.07
        if lb < velocity < ub:         
            action = 2
        else:
            action = 0
        return action  # 返回动作

    def learn(self, *args):  # 学习
        pass


agent = BespokeAgent(env)

步骤3.智能体与环境进行交互

def play_montecarlo(env, agent, render=False, train=False):
    episode_reward = 0. # 记录回合总奖励，初始化为0
    observation = env.reset() # 重置游戏环境，开始新回合
    print('start_observation',observation)
    while True: # 不断循环，直到回合结束
        if render: # 判断是否显示
            env.render() # 显示图形界面，图形界面可以用 env.close() 语句关闭
        action = agent.decide(observation)
        next_observation, reward, done, _ = env.step(action) # 执行动作
        episode_reward += reward # 收集回合奖励
        if train: # 判断是否训练智能体
            agent.learn(observation, action, reward, done) # 学习
        if done: # 回合结束，跳出循环
            break
        observation = next_observation
    return episode_reward # 返回回合总奖励

env.seed(0) # 设置随机数种子,只是为了让结果可以精确复现,一般情况下可删去
episode_reward = play_montecarlo(env, agent, render=True)
print('回合奖励 = {}'.format(episode_reward))
env.close() # 此语句可关闭图形界面

来路与归途

发布了109 篇原创文章 · 获赞 22 · 访问量 2万+

私信关注

第一章 初始强化学习

猜你喜欢

第一章初始强化学习