data(Tiger)
# solve the POMDP for 5 epochs and no discounting
sol <- solve_POMDP(Tiger, horizon = 5, discount = 1, method = "enum")
sol
policy(sol)
# uncomment the following line to register a parallel backend for simulation
# (needs package doparallel installed)
# doParallel::registerDoParallel()
# foreach::getDoParWorkers()
## Example 1: simulate 100 trajectories
sim <- simulate_POMDP(sol, n = 100, verbose = TRUE)
sim
# calculate the percentage that each action is used in the simulation
round_stochastic(sim$action_cnt / sum(sim$action_cnt), 2)
# reward distribution
hist(sim$reward)
## Example 2: look at the belief states and the trajectories starting with
# an initial start belief.
sim <- simulate_POMDP(sol, n = 100, belief = c(.5, .5),
return_beliefs = TRUE, return_trajectories = TRUE)
head(sim$belief_states)
head(sim$trajectories)
# plot with added density (the x-axis is the probability of the second belief state)
plot_belief_space(sol, sample = sim$belief_states, jitter = 2, ylim = c(0, 6))
lines(density(sim$belief_states[, 2], bw = .02)); axis(2); title(ylab = "Density")
## Example 3: simulate trajectories for an unsolved POMDP which uses an epsilon of 1
# (i.e., all actions are randomized). The simulation horizon for the
# infinite-horizon Tiger problem is calculated using delta_horizon.
sim <- simulate_POMDP(Tiger, return_beliefs = TRUE, verbose = TRUE)
sim$avg_reward
hist(sim$reward, breaks = 20)
plot_belief_space(sol, sample = sim$belief_states, jitter = 2, ylim = c(0, 6))
lines(density(sim$belief_states[, 1], bw = .05)); axis(2); title(ylab = "Density")
Run the code above in your browser using DataLab