set.seed(1)
# Inputs need not be sorted
d <- c(-5, 5, 0, 10, 12)
t <- c(-6, -4, 0, 2, 4, -2, 6)
# Index (in vector a) of closest match
idx <- matchindex(d, t, 1)
idx
# To check: show values of closest match
# Random draw among indices of the 5 closest predictors
matchindex(d, t)
# An example
train <- mtcars[1:20, ]
test <- mtcars[21:32, ]
fit <- lm(mpg ~ disp + cyl, data = train)
d <- fitted.values(fit)
t <- predict(fit, newdata = test) # note: not using mpg
idx <- matchindex(d, t)
# Borrow values from train to produce 12 synthetic values for mpg in test.
# Synthetic values are plausible values that could have been observed if
# they had been measured.
train$mpg[idx]
# Exercise: Create a distribution of 1000 plausible values for each of the
# twelve mpg entries in test, and count how many times the true value
# (which we know here) is located within the inter-quartile range of each
# distribution. Is your count anywhere close to 500? Why? Why not?
Run the code above in your browser using DataLab