set.seed(123)
#require(gRbase) #for faster computations in the internal functions
require(hash)
#simulate a dataset with continuous data
dataset <- matrix(nrow = 1000 , ncol = 300)
dataset <- apply(dataset, 1:2, function(i) runif(1, 1, 100))
#define a simulated class variable
target = 3*dataset[,10] + 2*dataset[,200] + 3*dataset[,20] + runif(1, 0, 1);
#define some simulated equivalences
dataset[,15] = dataset[,10]
dataset[1,10] = dataset[1,10] + 0.2
dataset[,250] = dataset[,200]
dataset[,230] = dataset[,200]
require("hash", quietly = TRUE)
{
#run the SES algorithm
sesObject <- SES(target , dataset , max_k=5 , threshold=0.2 , test="testIndFisher",
hash = TRUE, hashObject=NULL);
#print summary of the SES output
summary(sesObject);
#plot the SES output
plot(sesObject, mode="all");
#get the queues with the equivalences for each selected variable
sesObject@queues
#get the generated signatures
sesObject@signatures;
#get the run time
# > sesObject@runtime;
# user system elapsed
# 0.35 0.00 0.35
#re-run the SES algorithm with the same or different configuration
#under the hash-based implementation of retrieving the statistics
#in the SAME dataset (!important)
hashObj <- sesObject@hashObject;
sesObject2 <- SES(target , dataset , max_k=2 , threshold=0.01 , test="testIndFisher",
hash = TRUE, hashObject=hashObj);
#retrieve the results: summary, plot, sesObject2@...)
summary(sesObject2)
#get the run time
# > sesObject2@runtime;
# user system elapsed
# 0.01 0.00 0.01
}
Run the code above in your browser using DataLab