#Create test dataset
clean <- data.frame(dataset = rep("clean", 1000),
decimalLongitude = runif(min = -43, max = -40, n = 1000),
decimalLatitude = runif(min = -13, max = -10, n = 1000))
bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1),
round(runif(min = -42, max = -40, n = 300), 0),
runif(min = -42, max = -40, n = 200))
bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1),
round(runif(min = -12, max = -10, n = 300), 0),
runif(min = -12, max = -10, n = 200))
bias <- data.frame(dataset = rep("biased", 1000),
decimalLongitude = bias.long,
decimalLatitude = bias.lat)
test <- rbind(clean, bias)
if (FALSE) {
#run clean_dataset
flags <- clean_dataset(test)
#check problems
#clean
hist(test[test$dataset == rownames(flags[flags$summary,]), "decimalLongitude"])
#biased
hist(test[test$dataset == rownames(flags[!flags$summary,]), "decimalLongitude"])
}
Run the code above in your browser using DataLab