# HTP2 data: the observation 28 is considered as an outlier
data("HTP2")
outliers <- c(28)
boxplot(HTP2, horizontal = TRUE)
# Outlier detection using ICS
library(ICS)
if (FALSE) {
out <- ICS_outlier(HTP2, ICS_algorithm = "QR",
method = "norm_test",
test = "agostino.test", level_test = 0.05,
level_dist = 0.01, n_dist = 50)
# Here there is a singularity issue. One solution is to first reduce the
# dimension. To ensure higher numerical stability of the subsequent methods
# we suggest to permute the data and to use the QR decomposition instead of
# the regular SVD decomposition.
Xt <- HTP2
# Normalization by the mean
Xt.c <- sweep(HTP2, 2, colMeans(HTP2), "-")
# Permutation by rows
# decreasing by infinity norm: absolute maximum
norm_inf <- apply(Xt.c, 1, function(x) max(abs(x)))
order_rows <- order(norm_inf, decreasing = TRUE)
Xt_row_per <- Xt.c[order_rows,]
# QR decomposition of Xt with column pivoting from LAPACK
qr_Xt <- qr(1/sqrt(nrow(Xt.c)-1)*Xt_row_per, LAPACK = TRUE)
# Estimation of rank q
# R is nxp, but with only zero for rows > p
# the diag of R is already in decreasing order and is a good approximation
# of the rank of X.c. To decide on which singular values are zero we use
# a relative criteria based on previous values.
# R should be pxp
R <- qr.R(qr_Xt)
r_all <- abs(diag(R))
r_ratios <- r_all[2:length(r_all)]/r_all[1:(length(r_all)-1)]
q <- which(r_ratios < max(dim(Xt.c)) *.Machine$double.eps)[1]
q <- ifelse(is.na(q), length(r_all), q)
# Q should be nxp but we are only interested in nxq
Q1 <- qr.Q(qr_Xt)[,1:q]
# QR decomposition of Rt
R_q <- R[1:q, ]
qr_R <- qr(t(R_q), LAPACK = TRUE)
Tau <- qr.Q(qr_R)[1:q, ]
Omega1 <- qr.R(qr_R)[1:q, 1:q]
# New X tilde
# permutation matrices
# permutation of rows
Pi2 <- data.frame(model.matrix(~ . -1, data = data.frame(row=as.character(order_rows))))
Pi2 <- Pi2[,order(as.numeric(substr(colnames(Pi2), start = 4, stop = nchar(colnames(Pi2)))))]
colnames(Pi2) <- rownames(Xt)
# permutation of cols
Pi3 <- data.frame(model.matrix(~ . -1, data = data.frame(col=as.character( qr_R$pivot))))
Pi3 <- t(Pi3[,order(as.numeric(substr(colnames(Pi3), start = 4, stop = nchar(colnames(Pi3)))))])
X_tilde <- sqrt(nrow(Xt)-1)* Tau %*% t(Pi3) %*% t(Q1)
Xt_tilde <- t(Pi2) %*% t(X_tilde)
# Run ICS_outlier
out <- ICS_outlier(Xt_tilde, ICS_algorithm = "QR",
method = "norm_test",
test = "agostino.test", level_test = 0.01,
level_dist = 0.01, n_dist = 50)
summary(out)
plot(out)
text(outliers, out$ics_distances[outliers], outliers, pos = 2, cex = 0.9, col = 2)
}
Run the code above in your browser using DataLab