# NOT RUN {
# reasonably fast (though not instantly!) with
# sparse matrices up to a resulting matrix size of 1e8 cells.
# However, the calculations and the resulting matrix take up lots of memory
X <- rSparseMatrix(1e4, 1e4, 1e5)
system.time(M <- corSparse(X))
print(object.size(M), units = "auto") # more than 750 Mb
# Most values are low, so it often makes sense
# to remove low values to keep results sparse
M <- drop0(M, tol = 0.4)
print(object.size(M), units = "auto") # normally reduces size by half or more
length(M@x) / prod(dim(M)) # down to less than 0.05% non-zero entries
# }
# NOT RUN {
# comparison with other methods
# corSparse is much faster than cor from the stats package
# but cosSparse is even quicker than both!
X <- rSparseMatrix(1e3, 1e3, 1e4)
X2 <- as.matrix(X)
# if there is a warning, try again with different random X
system.time(McorRegular <- cor(X2))
system.time(McorSparse <- corSparse(X))
system.time(McosSparse <- cosSparse(X))
# cor and corSparse give identical results
all.equal(McorSparse, McorRegular)
# corSparse and cosSparse are not identical, but close
McosSparse <- as.matrix(McosSparse)
dimnames(McosSparse) <- NULL
all.equal(McorSparse, McosSparse)
# Actually, cosSparse and corSparse are *almost* identical!
cor(as.dist(McorSparse), as.dist(McosSparse))
# Visually it looks completely identical
# Note: this takes some time to plot
# }
# NOT RUN {
plot(as.dist(McorSparse), as.dist(McosSparse))
# }
# NOT RUN {
# So: consider using cosSparse instead of cor or corSparse.
# With sparse matrices, this gives mostly the same results,
# but much larger matrices are possible
# and the computations are quicker and more sparse
# }
Run the code above in your browser using DataLab