# Defines states, actions and a transition model for a standard gridworld
gw <- gridworld_init(dim = c(7,7),
unreachable_states = c("s(2,2)", "s(7,3)", "s(3,6)"),
absorbing_states = "s(4,4)",
labels = list("s(4,4)" = "Black Hole")
)
gw$states
gw$actions
gw$info
# display the state labels in the gridworld
gridworld_matrix(gw)
gridworld_matrix(gw, what = "label")
gridworld_matrix(gw, what = "reachable")
gridworld_matrix(gw, what = "absorbing")
# a transition function for regular moves in the gridworld is provided
gw$transition_prob("right", "s(1,1)", "s(1,2)")
gw$transition_prob("right", "s(2,1)", "s(2,2)") ### we cannot move into an unreachable state
gw$transition_prob("right", "s(2,1)", "s(2,1)") ### but the agent stays in place
# convert between state names and row/column indices
gridworld_s2rc("s(1,1)")
gridworld_rc2s(c(1,1))
# The information in gw can be used to build a custom MDP.
# We modify the standard transition function so there is a 50% chance that
# you will get sucked into the black hole from the adjacent squares.
trans_black_hole <- function(action = NA, start.state = NA, end.state = NA) {
# ignore the action next to the black hole
if (start.state %in% c("s(3,3)", "s(3,4)", "s(3,5)", "s(4,3)", "s(4,5)",
"s(5,3)", "s(5,4)", "s(5,5)")) {
if(end.state == "s(4,4)")
return(.5)
else
return(gw$transition_prob(action, start.state, end.state) * .5)
}
# use the standard gridworld movement
gw$transition_prob(action, start.state, end.state)
}
black_hole <- MDP(states = gw$states,
actions = gw$actions,
transition_prob = trans_black_hole,
reward = rbind(R_(value = +1), R_(end.state = "s(4,4)", value = -100)),
info = gw$info,
name = "Black hole"
)
black_hole
gridworld_plot_transition_graph(black_hole)
# solve the problem
sol <- solve_MDP(black_hole)
gridworld_matrix(sol, what = "values")
gridworld_plot_policy(sol)
# the optimal policy is to fly around, but avoid the black hole.
# Build a Maze: The Dyna Maze from Chapter 8 in the RL book
DynaMaze <- gridworld_maze_MDP(
dim = c(6,9),
start = "s(3,1)",
goal = "s(1,9)",
walls = c("s(2,3)", "s(3,3)", "s(4,3)",
"s(5,6)",
"s(1,8)", "s(2,8)", "s(3,8)"),
restart = TRUE,
discount = 0.95,
name = "Dyna Maze",
)
DynaMaze
gridworld_matrix(DynaMaze)
gridworld_matrix(DynaMaze, what = "labels")
gridworld_plot_transition_graph(DynaMaze)
# Note that the problems resets if the goal state would be reached.
sol <- solve_MDP(DynaMaze)
gridworld_matrix(sol, what = "values")
gridworld_matrix(sol, what = "actions")
gridworld_plot_policy(sol)
gridworld_plot_policy(sol, actions = "label", cex = 1, states = FALSE)
# visualize the first 3 iterations of value iteration
gridworld_animate(DynaMaze, method = "value", n = 3)
Run the code above in your browser using DataLab