A simple and complete Q-Learner implementation for solving the Mountain Car problem
In this section, we will put together the whole code into a single Python script to initialize the environment, launch the agent's training process, get the trained policy, test the performance of the agent, and also record how it acts in the environment!
#!/usr/bin/env/ python import gym import numpy as np MAX_NUM_EPISODES = 50000 STEPS_PER_EPISODE = 200 # This is specific to MountainCar. May change with env EPSILON_MIN = 0.005 max_num_steps = MAX_NUM_EPISODES * STEPS_PER_EPISODE EPSILON_DECAY = 500 * EPSILON_MIN / max_num_steps ALPHA = 0.05 # Learning rate GAMMA = 0.98 # Discount factor NUM_DISCRETE_BINS = 30 # Number of bins to Discretize each observation dim class Q_Learner(object): def __init__(self, env): self.obs_shape = env.observation_space.shape self.obs_high = env.observation_space.high self.obs_low = env.observation_space.low self.obs_bins = NUM_DISCRETE_BINS...