## First example: Artificial map data from Legendre & Legendre
## (2012, Fig. 13.26): n = 16
dat <- c(41,42,25,38,50,30,41,43,43,41,30,50,38,25,42,41)
coord.dat <- matrix(c(1,3,5,7,2,4,6,8,1,3,5,7,2,4,6,8,
4.4,4.4,4.4,4.4,3.3,3.3,3.3,3.3,
2.2,2.2,2.2,2.2,1.1,1.1,1.1,1.1),16,2)
## Obtaining a list of neighbours:
library(spdep)
listW <- nb2listw(tri2nb(coord.dat), style="B")
links.mat.dat <- listw2mat(listW)
neighbors <- listw2sn(listW)[,1:2]
## Calculating the (Euclidean) distance between points:
D.dat <- dist(dat)
## Display the points:
plot(coord.dat, type='n',asp=1)
title("Delaunay triangulation")
text(coord.dat, labels=as.character(as.matrix(dat)), pos=3)
for(i in 1:nrow(neighbors))
lines(rbind(coord.dat[neighbors[i,1],],
coord.dat[neighbors[i,2],]))
## Unconstrained clustring by hclust:
grpWD2_hclust <- hclust(D.dat, method="ward.D2")
plot(grpWD2_hclust, hang=-1)
## Clustering without a contiguity constraint;
## the result is represented as a dendrogram:
grpWD2_constr_hclust <- constr.hclust(D.dat, method="ward.D2")
plot(grpWD2_constr_hclust, hang=-1)
## Clustering with a contiguity constraint described by a list of
## links:
grpWD2cst_constr_hclust <-
constr.hclust(
D.dat, method="ward.D2",
neighbors, coord.dat)
## To visualize using hclust's plotting method:
## stats:::plot.hclust(grpWD2cst_constr_hclust, hang=-1)
## Plot the results on a map with k=3 clusters:
plot(grpWD2cst_constr_hclust, k=3, links=TRUE, las=1, xlab="Eastings",
ylab="Northings", cex=3, lwd=3)
## Generic functions from hclust can be used, for instance to obtain
## a list of members of each cluster:
cutree(grpWD2cst_constr_hclust, k=3)
## Now with k=5 clusters:
plot(grpWD2cst_constr_hclust, k=5, links=TRUE, las=1, xlab="Eastings",
ylab="Northings", cex=3, lwd=3)
cutree(grpWD2cst_constr_hclust, k=5)
## End of the artificial map example
## Second example: Scotch Whiskey distilleries clustered using tasting
## scores (nose, body, palate, finish, and the four distances combined)
## constrained with respect to the distillery locations in Scotland.
## Documentation file about the Scotch Whiskey data: ?ScotchWhiskey
data(ScotchWhiskey)
## Cluster analyses for the nose, body, palate, and finish D
## matrices:
grpWD2cst_ScotchWhiskey <-
lapply(
ScotchWhiskey$dist, ## A list of distance matrices
constr.hclust, ## The function called by function lapply
links=ScotchWhiskey$neighbors@data, ## The list of links
coords=ScotchWhiskey$geo@coords/1000
)
## The four D matrices (nose, body, palate, finish), represented as
## vectors in the ScotchWiskey data file, are combined as follows to
## produce a single distance matrix integrating all four types of
## tastes:
Dmat <- ScotchWhiskey$dist
ScotchWhiskey[["norm"]] <-
sqrt(Dmat$nose^2 + Dmat$body^2 + Dmat$palate^2 + Dmat$finish^2)
## This example shows how to apply const.clust to a single D matrix when
## the data file contains several matrices.
grpWD2cst_ScotchWhiskey[["norm"]] <-
constr.hclust(
d=ScotchWhiskey[["norm"]],method="ward.D2",
ScotchWhiskey$neighbors@data,
coords=ScotchWhiskey$geo@coords/1000
)
## A fonction to plot the Whiskey clustering results:
plotWhiskey <- function(wh, k) {
par(fig=c(0,1,0,1))
plot(grpWD2cst_ScotchWhiskey[[wh]], k=k, links=TRUE, las=1,
xlab="Eastings (km)", ylab="Northings (km)", cex=0.1, lwd=3,
main=sprintf("Feature: %s",wh))
text(ScotchWhiskey$geo@coords/1000,labels=1:length(ScotchWhiskey$geo))
legend(x=375, y=700, lty=1L, lwd=3, col=rainbow(1.2*k)[1L:k],
legend=sprintf("Group %d",1:k), cex=1.25)
SpeyZoom <- list(xlim=c(314.7,342.2), ylim=c(834.3,860.0))
rect(xleft=SpeyZoom$xlim[1L], ybottom=SpeyZoom$ylim[1L],col="#E6E6E680",
xright=SpeyZoom$xlim[2L], ytop=SpeyZoom$ylim[2L], lwd=2, lty=1L)
par(fig=c(0.01,0.50,0.46,0.99), new=TRUE)
plot(grpWD2cst_ScotchWhiskey[[wh]], xlim=SpeyZoom$xlim,
ylim=SpeyZoom$ylim, k=k, links=TRUE, las=1, xlab="", ylab="",
cex=0.1, lwd=3, axes=FALSE)
text(ScotchWhiskey$geo@coords/1000,labels=1:length(ScotchWhiskey$geo))
rect(xleft=SpeyZoom$xlim[1L], ybottom=SpeyZoom$ylim[1L],
xright=SpeyZoom$xlim[2L], ytop=SpeyZoom$ylim[2L], lwd=2, lty=1L)
}
## Plot the clustering results on the map of Scotland for 5 groups.
## The inset map shows the Speyside distilleries in detail:
plotWhiskey("nose", 5L)
plotWhiskey("body", 5L)
plotWhiskey("palate", 5L)
plotWhiskey("finish", 5L)
plotWhiskey("norm", 5L)
## End of the Scotch Whiskey tasting data example
if (FALSE) {
## Third example: Fish community composition along the Doubs River,
## France. The sequence is analyzed as a case of chronological
## clustering, substituting space for time.
if(require("ade4", quietly = TRUE)){
data(doubs, package="ade4")
Doubs.D <- dist.ldc(doubs$fish, method="hellinger")
grpWD2cst_fish <- constr.hclust(Doubs.D, method="ward.D2", chron=TRUE,
coords=as.matrix(doubs$xy))
plot(grpWD2cst_fish, k=5, las=1, xlab="Eastings (km)",
ylab="Northings (km)", cex=3, lwd=3)
## Repeat the plot with other values of k (number of groups)
## End of the Doubs River fish assemblages example
## Example with 6 connected points, shown in Fig. 2 of Guénard & Legendre paper
var = c(1.5, 0.2, 5.1, 3.0, 2.1, 1.4)
ex.Y = data.frame(var)
## Site coordinates, matrix xy
x.coo = c(-1, -2, -0.5, 0.5, 2, 1)
y.coo = c(-2, -1, 0, 0, 1, 2)
ex.xy = data.frame(x.coo, y.coo)
## Matrix of connecting edges E
from = c(1,1,2,3,4,3,4)
to = c(2,3,3,4,5,6,6)
ex.E = data.frame(from, to)
## Carry out constrained clustering analysis
test.out <-
constr.hclust(
dist(ex.Y), # Response dissimilarity matrix
method="ward.D2", # Clustering method
links=ex.E, # File of link edges (constraint) E
coords=ex.xy # File of geographic coordinates
)
par(mfrow=c(1,2))
## Plot the map of the results for k = 3
plot(test.out, k=3)
## Plot the dendrogram
stats:::plot.hclust(test.out, hang=-1)
}
## Same example modified: disjoint clusters
## Same ex.Y and ex.xy as in the previous example
var = c(1.5, 0.2, 5.1, 3.0, 2.1, 1.4)
ex.Y = data.frame(var)
## Site coordinates, matrix xy
x.coo = c(-1, -2, -0.5, 0.5, 2, 1)
y.coo = c(-2, -1, 0, 0, 1, 2)
ex.xy = data.frame(x.coo, y.coo)
## Matrix of connecting edges E2
from = c(1,1,2,4,4)
to = c(2,3,3,5,6)
ex.E2 = data.frame(from, to)
## Carry out constrained clustering analysis
test.out2 <-
constr.hclust(
dist(ex.Y), # Response dissimilarity matrix
method="ward.D2", # Clustering method
links=ex.E2, # File of link edges (constraint) E
coords=ex.xy # File of geographic coordinates
)
cutree(test.out2, k=2)
par(mfrow=c(1,2))
## Plot the map of the results for k = 3
plot(test.out2, k=3)
## Plot the dendrogram showing the disconnected groups
stats:::plot.hclust(test.out2, hang=-1)
axis(2,at=0:ceiling(max(test.out2$height,na.rm=TRUE)))
## End of the disjoint clusters example
}
## End of examples
Run the code above in your browser using DataLab