data("iris")
# shuffle rows
x_iris <- iris[sample(seq(nrow(iris))), -5]
d <- dist(x_iris)
# Plot original matrix
dissplot(d, method = NA)
# Plot reordered matrix using the nearest insertion algorithm (from tsp)
dissplot(d, method = "TSP", main = "Seriation (TSP)")
# Cluster iris with k-means and 3 clusters and reorder the dissimality matrix
l <- kmeans(x_iris, centers = 3)$cluster
dissplot(d, labels = l, main = "k-means")
# show only distances as lower triangle
dissplot(d, labels = l, main = "k-means", lower_tri = TRUE, upper_tri = FALSE)
# Use a grid layout to place several plots on a page
library("grid")
grid.newpage()
pushViewport(viewport(layout=grid.layout(nrow = 2, ncol = 2),
gp = gpar(fontsize = 8)))
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1))
# Visualize the clustering (using Spectral between clusters and MDS within)
res <- dissplot(d, l, method = list(inter = "Spectral", intra = "MDS"),
main = "K-Means + Seriation", newpage = FALSE)
popViewport()
pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 2))
# More visualization options. Note that we reuse the reordered object res!
# color: use 10 shades red-blue, biased towards small distances
plot(res, main = "K-Means + Seriation (red-blue + biased)",
col= bluered(10, bias = .5), newpage = FALSE)
popViewport()
pushViewport(viewport(layout.pos.row = 2, layout.pos.col = 1))
# Threshold (using zlim) and cubic scale to highlight differences
plot(res, main = "K-Means + Seriation (cubic + threshold)",
zlim = c(0, 2), col = grays(100, power = 3), newpage = FALSE)
popViewport()
pushViewport(viewport(layout.pos.row = 2, layout.pos.col = 2))
# Use gray scale with logistic transformation
plot(res, main = "K-Means + Seriation (logistic scale)",
col = gray(
plogis(seq(max(res$x_reordered), min(res$x_reordered), length.out = 100),
location = 2, scale = 1/2, log = FALSE)
),
newpage = FALSE)
popViewport(2)
# The reordered_cluster_dissimilarity_matrix object
res
names(res)
## --------------------------------------------------------------------
## ggplot-based dissplot
if (require("ggplot2")) {
library("ggplot2")
# Plot original matrix
ggdissplot(d, method = NA)
# Plot seriated matrix
ggdissplot(d, method = "TSP") +
labs(title = "Seriation (TSP)")
# Cluster iris with k-means and 3 clusters
l <- kmeans(x_iris, centers = 3)$cluster
ggdissplot(d, labels = l) +
labs(title = "K-means + Seriation")
# show only lower triangle
ggdissplot(d, labels = l, lower_tri = TRUE, upper_tri = FALSE) +
labs(title = "K-means + Seriation")
# No lines or cluster labels and add a label for the color key (fill)
ggdissplot(d, labels = l, cluster_lines = FALSE, cluster_labels = FALSE) +
labs(title = "K-means + Seriation", fill = "Distances\n(Euclidean)")
# Diverging color palette with manual set midpoint and different seriation methods
ggdissplot(d, l, method = list(inter = "Spectral", intra = "MDS")) +
labs(title = "K-Means + Seriation", subtitle = "biased color scale") +
scale_fill_gradient2(midpoint = median(d))
# Use manipulate scale using package scales
library("scales")
# Threshold (using limit and na.value) and cubic scale to highlight differences
cubic_dist_trans <- trans_new(
name = "cubic",
# note that we have to do the inverse transformation for distances
trans = function(x) x^(1/3),
inverse = function(x) x^3
)
ggdissplot(d, l, method = list(inter = "Spectral", intra = "MDS")) +
labs(title = "K-Means + Seriation", subtitle = "cubic + biased color scale") +
scale_fill_gradient(low = "black", high = "white",
limit = c(0,2), na.value = "white",
trans = cubic_dist_trans)
# Use gray scale with logistic transformation
logis_2_.5_dist_trans <- trans_new(
name = "Logistic transform (location, scale)",
# note that we have to do the inverse transformation for distances
trans = function(x) plogis(x, location = 2, scale = .5, log = FALSE),
inverse = function(x) qlogis(x, location = 2, scale = .5, log = FALSE),
)
ggdissplot(d, l, method = list(inter = "Spectral", intra = "MDS")) +
labs(title = "K-Means + Seriation", subtitle = "logistic color scale") +
scale_fill_gradient(low = "black", high = "white",
trans = logis_2_.5_dist_trans,
breaks = c(0, 1, 2, 3, 4))
}
Run the code above in your browser using DataLab