# library(naivebayes)
### Simulate the data:
set.seed(1)
cols <- 10 ; rows <- 100 ; probs <- c("0" = 0.9, "1" = 0.1)
M <- matrix(sample(0:1, rows * cols, TRUE, probs), nrow = rows, ncol = cols)
y <- factor(sample(paste0("class", LETTERS[1:2]), rows, TRUE, prob = c(0.3,0.7)))
colnames(M) <- paste0("V", seq_len(ncol(M)))
laplace <- 0
### Train the Bernoulli Naive Bayes
bnb <- bernoulli_naive_bayes(x = M, y = y, laplace = laplace)
summary(bnb)
# Classification
head(predict(bnb, newdata = M, type = "class")) # head(bnb %class% M)
# Posterior probabilities
head(predict(bnb, newdata = M, type = "prob")) # head(bnb %prob% M)
# Parameter estimates
coef(bnb)
### Sparse data: train the Bernoulli Naive Bayes
library(Matrix)
M_sparse <- Matrix(M, sparse = TRUE)
class(M_sparse) # dgCMatrix
# Fit the model with sparse data
bnb_sparse <- bernoulli_naive_bayes(M_sparse, y, laplace = laplace)
# Classification
head(predict(bnb_sparse, newdata = M_sparse, type = "class"))
# Posterior probabilities
head(predict(bnb_sparse, newdata = M_sparse, type = "prob"))
# Parameter estimates
coef(bnb_sparse)
### Equivalent calculation with general naive_bayes function.
### (no sparse data support by naive_bayes)
# Make sure that the columns are factors with the 0-1 levels
df <- as.data.frame(lapply(as.data.frame(M), factor, levels = c(0,1)))
# sapply(df, class)
nb <- naive_bayes(df, y, laplace = laplace)
summary(nb)
head(predict(nb, type = "prob"))
# Obtain probability tables
tables(nb, which = "V1")
tables(bnb, which = "V1")
# Visualise class conditional Bernoulli distributions
plot(nb, "V1", prob = "conditional")
plot(bnb, which = "V1", prob = "conditional")
# Check the equivalence of the class conditional distributions
all(get_cond_dist(nb) == get_cond_dist(bnb))
Run the code above in your browser using DataLab