# NOT RUN {
# load entropy library
library("entropy")
## one discrete random variable
# observed counts in each class
y = c(4, 2, 3, 1, 6, 4)
n = sum(y) # 20
# expected frequencies and counts
freqs.expected = c(0.10, 0.15, 0.35, 0.05, 0.20, 0.15)
y.expected = n*freqs.expected
# G statistic (with p-value)
Gstat(y, freqs.expected) # from expected frequencies
Gstat(y, y.expected) # alternatively from expected counts
# G statistic computed from empirical KL divergence
2*n*KL.empirical(y, y.expected)
## Pearson chi-squared statistic (with p-value)
# this can be viewed an approximation of the G statistic
chi2stat(y, freqs.expected) # from expected frequencies
chi2stat(y, y.expected) # alternatively from expected counts
# computed from empirical chi-squared divergence
n*chi2.empirical(y, y.expected)
# compare with built-in function
chisq.test(y, p = freqs.expected)
## joint distribution of two discrete random variables
# contingency table with counts
y.mat = matrix(c(4, 5, 1, 2, 4, 4), ncol = 2) # 3x2 example matrix of counts
n.mat = sum(y.mat) # 20
# G statistic between empirical observed joint distribution and product distribution
Gstatindep( y.mat )
# computed from empirical mutual information
2*n.mat*mi.empirical(y.mat)
# Pearson chi-squared statistic of independence
chi2statindep( y.mat )
# computed from empirical chi-square divergence
n.mat*chi2indep.empirical(y.mat)
# compare with built-in function
chisq.test(y.mat)
# }
Run the code above in your browser using DataLab