# Michael's Sleepy Tiger Problem is like the POMDP Tiger problem, but
# has completely observable states because the tiger is sleeping in front
# of the door. This makes the problem an MDP.
STiger <- MDP(
name = "Michael's Sleepy Tiger Problem",
discount = .9,
states = c("tiger-left" , "tiger-right"),
actions = c("open-left", "open-right", "do-nothing"),
start = "uniform",
# opening a door resets the problem
transition_prob = list(
"open-left" = "uniform",
"open-right" = "uniform",
"do-nothing" = "identity"),
# the reward helper R_() expects: action, start.state, end.state, observation, value
reward = rbind(
R_("open-left", "tiger-left", v = -100),
R_("open-left", "tiger-right", v = 10),
R_("open-right", "tiger-left", v = 10),
R_("open-right", "tiger-right", v = -100),
R_("do-nothing", v = 0)
)
)
STiger
sol <- solve_MDP(STiger)
sol
policy(sol)
plot_value_function(sol)
# convert the MDP into a POMDP and solve
STiger_POMDP <- make_partially_observable(STiger)
sol2 <- solve_POMDP(STiger_POMDP)
sol2
policy(sol2)
plot_value_function(sol2, ylim = c(80, 120))
Run the code above in your browser using DataLab