## Defining the Tiger Problem (it is also available via data(Tiger), see ? Tiger)
Tiger <- POMDP(
name = "Tiger Problem",
discount = 0.75,
states = c("tiger-left" , "tiger-right"),
actions = c("listen", "open-left", "open-right"),
observations = c("tiger-left", "tiger-right"),
start = "uniform",
transition_prob = list(
"listen" = "identity",
"open-left" = "uniform",
"open-right" = "uniform"
),
observation_prob = list(
"listen" = rbind(c(0.85, 0.15),
c(0.15, 0.85)),
"open-left" = "uniform",
"open-right" = "uniform"
),
# the reward helper expects: action, start.state, end.state, observation, value
# missing arguments default to NA which matches any value (often denoted as * in POMDPs).
reward = rbind(
R_("listen", v = -1),
R_("open-left", "tiger-left", v = -100),
R_("open-left", "tiger-right", v = 10),
R_("open-right", "tiger-left", v = 10),
R_("open-right", "tiger-right", v = -100)
)
)
Tiger
### Defining the Tiger problem using functions
trans_f <- function(action, start.state, end.state) {
if(action == 'listen')
if(end.state == start.state) return(1)
else return(0)
return(1/2) ### all other actions have a uniform distribution
}
obs_f <- function(action, end.state, observation) {
if(action == 'listen')
if(end.state == observation) return(0.85)
else return(0.15)
return(1/2)
}
rew_f <- function(action, start.state, end.state, observation) {
if(action == 'listen') return(-1)
if(action == 'open-left' && start.state == 'tiger-left') return(-100)
if(action == 'open-left' && start.state == 'tiger-right') return(10)
if(action == 'open-right' && start.state == 'tiger-left') return(10)
if(action == 'open-right' && start.state == 'tiger-right') return(-100)
stop('Not possible')
}
Tiger_func <- POMDP(
name = "Tiger Problem",
discount = 0.75,
states = c("tiger-left" , "tiger-right"),
actions = c("listen", "open-left", "open-right"),
observations = c("tiger-left", "tiger-right"),
start = "uniform",
transition_prob = trans_f,
observation_prob = obs_f,
reward = rew_f
)
Tiger_func
# Defining a Time-dependent version of the Tiger Problem called Scared Tiger
# The tiger reacts normally for 3 epochs (goes randomly two one
# of the two doors when a door was opened). After 3 epochs he gets
# scared and when a door is opened then he always goes to the other door.
# specify the horizon for each of the two different episodes
Tiger_time_dependent <- Tiger
Tiger_time_dependent$name <- "Scared Tiger Problem"
Tiger_time_dependent$horizon <- c(normal_tiger = 3, scared_tiger = 3)
Tiger_time_dependent$transition_prob <- list(
normal_tiger = list(
"listen" = "identity",
"open-left" = "uniform",
"open-right" = "uniform"),
scared_tiger = list(
"listen" = "identity",
"open-left" = rbind(c(0, 1), c(0, 1)),
"open-right" = rbind(c(1, 0), c(1, 0))
)
)
Run the code above in your browser using DataLab