# Das Moordatenset sveg laden
library("readr")
<- read_delim("datasets/stat5-8/dave_sveg.csv")
sveg
# PCA und CA rechnen
library("vegan")
<- rda(sveg^0.25, scale = TRUE)
pca <- cca(sveg^0.5)
ca
# k-means-Clustering mit 4 Gruppen durchführen
.1 <- kmeans(sveg, 4) kmeans
Stat8: Demo
- Download dieses Demoscript via “</>Code” (oben rechts)
- Datensatz Doubs.RData
- Datensatz dave_sveg.csv von Wildi (2017)
- Funktion drawmap.R drawmap.R
- Funktion hcoplot.R hcoplot.R
k-means clustering
# Clustering-Resultat in Ordinationsplots darstellen
plot(ca, type = "n")
points(ca, display = "sites", pch=19, col = kmeans.1[[1]])
# k-means Clustering mit 3 Clusters
.2 <- kmeans(sveg, 3)
kmeansplot(pca, type = "n")
points(pca, display = "sites", pch=19, col = kmeans.2[[1]])
# mit 3. Achse darstellen
plot(pca, choices = c(1, 3), type = "n")
points(pca, choices = c(1, 3), display = "sites", pch = 19, col=kmeans.2[[1]])
# k-means partitioning, 2 to 10 groups
<- cascadeKM(sveg, inf.gr = 2, sup.gr = 10, iter = 100, criterion = "ssi")
KM.cascade summary(KM.cascade)
Length Class Mode
partition 567 -none- numeric
results 18 -none- numeric
criterion 1 -none- character
size 90 -none- numeric
$results KM.cascade
2 groups 3 groups 4 groups 5 groups 6 groups 7 groups
SSE 1840.13571 1629.4399038 1488.2961538 1378.3369048 1286.5005411 1214.3219697
ssi 0.26103 0.2716237 0.3630755 0.3101818 0.3866348 0.4499705
8 groups 9 groups 10 groups
SSE 1156.7314935 1101.5523810 1053.1476190
ssi 0.3995404 0.5187903 0.4732336
$partition KM.cascade
2 groups 3 groups 4 groups 5 groups 6 groups 7 groups 8 groups 9 groups
1 1 3 4 4 6 7 3 1
2 1 3 3 2 1 3 2 9
3 1 3 4 4 6 7 3 1
4 1 3 4 4 6 7 3 1
5 1 3 3 2 1 3 2 9
6 1 3 4 4 1 3 2 9
7 1 3 4 4 6 7 3 1
8 1 3 4 4 6 7 3 1
9 1 3 4 4 6 7 3 1
10 1 3 3 2 5 1 5 5
11 1 3 4 4 6 7 3 1
12 1 3 3 2 1 3 2 9
13 1 3 3 2 5 1 5 6
14 1 3 3 2 1 3 2 9
15 1 3 3 2 1 3 2 9
16 1 3 3 2 1 3 2 9
17 1 3 3 2 5 1 5 5
18 2 2 2 3 4 4 8 7
19 1 3 3 2 1 3 2 9
20 1 3 3 2 5 1 5 5
21 1 3 4 4 6 7 3 5
22 1 3 3 2 1 3 2 9
23 2 2 2 3 4 4 8 7
24 1 3 4 4 6 7 3 5
25 1 3 4 4 6 7 3 5
26 2 1 1 3 4 4 8 7
27 1 3 3 2 5 1 5 6
28 1 3 3 2 5 1 5 6
29 1 3 3 2 5 1 5 6
30 1 3 3 2 5 1 5 5
31 2 2 2 1 3 6 7 4
32 1 3 3 2 5 1 5 6
33 1 3 3 2 5 1 5 5
34 1 2 2 1 3 6 7 4
35 2 2 2 1 3 6 7 4
36 1 3 3 2 5 1 5 6
37 1 3 3 2 5 1 5 6
38 2 2 2 3 4 4 8 7
39 2 1 1 5 2 5 6 3
40 1 3 4 4 6 7 3 1
41 2 1 1 5 2 5 1 8
42 2 1 1 5 2 5 6 3
43 2 1 1 5 2 5 1 8
44 1 2 2 1 3 6 7 4
45 2 2 2 3 4 4 8 7
46 2 2 2 1 3 6 7 4
47 1 2 2 1 3 6 7 6
48 2 1 1 5 2 5 1 8
49 2 2 2 1 3 6 7 4
50 2 2 2 1 3 6 7 4
51 2 1 1 3 4 2 4 2
52 2 1 1 5 2 5 1 8
53 2 1 1 5 2 5 4 2
54 2 1 1 3 4 2 4 2
55 2 1 1 3 4 2 4 2
56 2 1 1 5 2 5 1 8
57 2 1 1 3 4 2 4 2
58 2 1 1 3 4 2 4 2
59 2 1 1 3 4 4 8 7
60 2 1 1 3 4 4 8 7
61 2 2 2 3 4 4 8 7
62 2 1 1 5 2 5 6 3
63 2 1 1 3 4 2 4 2
10 groups
1 2
2 6
3 2
4 2
5 6
6 6
7 2
8 2
9 2
10 8
11 7
12 6
13 5
14 6
15 6
16 6
17 8
18 7
19 6
20 8
21 8
22 6
23 4
24 8
25 8
26 4
27 5
28 5
29 5
30 8
31 10
32 5
33 8
34 10
35 10
36 5
37 5
38 7
39 3
40 2
41 1
42 3
43 1
44 10
45 4
46 10
47 5
48 1
49 10
50 10
51 9
52 1
53 9
54 9
55 9
56 1
57 9
58 9
59 4
60 4
61 4
62 3
63 9
# k-means visualisation
plot(KM.cascade, sortg = TRUE)
Agglomarative Clusteranalyse
mit Daten und Skripten aus Borcard u. a. (2011)
# Daten laden
load("datasets/stat5-8/Doubs.RData")
# Remove empty site 8
<- spe[-8, ]
spe <- env[-8, ]
env <- spa[-8, ]
spa <- latlong[-8, ] latlong
Dendogramme berechnen und ploten
## Hierarchical agglomerative clustering of the species abundance
library("cluster")
# Compute matrix of chord distance among sites
<- decostand(spe, "normalize")
spe.norm <- vegdist(spe.norm, "euc")
spe.ch
# Attach site names to object of class 'dist'
attr(spe.ch, "Labels") <- rownames(spe)
par(mfrow = c(1, 1))
# Compute single linkage agglomerative clustering
<- hclust(spe.ch, method = "single")
spe.ch.single # Plot a dendrogram using the default options
plot(spe.ch.single, labels = rownames(spe), main = "Chord - Single linkage")
# Compute complete-linkage agglomerative clustering
<- hclust(spe.ch, method = "complete")
spe.ch.complete plot(spe.ch.complete, labels = rownames(spe), main = "Chord - Complete linkage")
# Compute UPGMA agglomerative clustering
<- hclust(spe.ch, method = "average")
spe.ch.UPGMA plot(spe.ch.UPGMA, labels = rownames(spe), main = "Chord - UPGMA")
# Compute centroid clustering
<- hclust(spe.ch, method = "centroid")
spe.ch.centroid plot(spe.ch.centroid, labels = rownames(spe), main = "Chord - Centroid")
# Compute Ward's minimum variance clustering
<-hclust(spe.ch, method = "ward.D2")
spe.ch.ward plot(spe.ch.ward, labels = rownames(spe), main = "Chord - Ward")
# Compute beta-flexible clustering using cluster::agnes()
# beta = -0.1
<- agnes(spe.ch, method = "flexible", par.method = 0.55)
spe.ch.beta1 # beta = -0.25
<- agnes(spe.ch, method = "flexible", par.method = 0.625)
spe.ch.beta2 # beta = -0.5
<- agnes(spe.ch, method = "flexible", par.method = 0.75)
spe.ch.beta3 # Change the class of agnes objects
class(spe.ch.beta1)
[1] "agnes" "twins"
<- as.hclust(spe.ch.beta1)
spe.ch.beta1 class(spe.ch.beta1)
[1] "hclust"
<- as.hclust(spe.ch.beta2)
spe.ch.beta2 <- as.hclust(spe.ch.beta3)
spe.ch.beta3
par(mfrow = c(2, 2))
plot(spe.ch.beta1, labels = rownames(spe), main = "Chord - Beta-flexible (beta=-0.1)")
plot(spe.ch.beta2, labels = rownames(spe), main = "Chord - Beta-flexible (beta=-0.25)")
plot(spe.ch.beta3, labels = rownames(spe), main = "Chord - Beta-flexible (beta=-0.5)")
# Compute Ward's minimum variance clustering
<- hclust(spe.ch, method = "ward.D2")
spe.ch.ward plot(spe.ch.ward, labels = rownames(spe), main = "Chord - Ward")
Cophenetic correlations
# Single linkage clustering
<- cophenetic(spe.ch.single)
spe.ch.single.coph cor(spe.ch, spe.ch.single.coph)
[1] 0.599193
# Complete linkage clustering
<- cophenetic(spe.ch.complete)
spe.ch.comp.coph cor(spe.ch, spe.ch.comp.coph)
[1] 0.7655628
# Average clustering
<- cophenetic(spe.ch.UPGMA)
spe.ch.UPGMA.coph cor(spe.ch, spe.ch.UPGMA.coph)
[1] 0.8608326
# Ward clustering
<- cophenetic(spe.ch.ward)
spe.ch.ward.coph cor(spe.ch, spe.ch.ward.coph)
[1] 0.7998516
# Shepard-like diagrams
par(mfrow = c(2, 2))
plot(spe.ch, spe.ch.single.coph,
xlab = "Chord distance", ylab = "Cophenetic distance",
asp = 1, xlim = c(0, sqrt(2)), ylim = c(0, sqrt(2)),
main = c("Single linkage", paste("Cophenetic correlation =",
round(cor(spe.ch, spe.ch.single.coph), 3))))
abline(0, 1)
lines(lowess(spe.ch, spe.ch.single.coph), col = "red")
plot(spe.ch, spe.ch.comp.coph,
xlab = "Chord distance", ylab = "Cophenetic distance",
asp = 1, xlim = c(0, sqrt(2)), ylim = c(0, sqrt(2)),
main = c("Complete linkage", paste("Cophenetic correlation =",
round(cor(spe.ch, spe.ch.comp.coph), 3))))
abline(0, 1)
lines(lowess(spe.ch, spe.ch.comp.coph), col = "red")
plot(spe.ch, spe.ch.UPGMA.coph,
xlab = "Chord distance", ylab = "Cophenetic distance",
asp = 1, xlim = c(0, sqrt(2)), ylim = c(0, sqrt(2)),
main = c("UPGMA", paste("Cophenetic correlation =",
round( cor(spe.ch, spe.ch.UPGMA.coph), 3))))
abline(0, 1)
lines(lowess(spe.ch, spe.ch.UPGMA.coph), col = "red")
plot(spe.ch, spe.ch.ward.coph,
xlab = "Chord distance", ylab = "Cophenetic distance",
asp = 1, xlim = c(0, sqrt(2)), ylim = c(0, max(spe.ch.ward$height)),
main = c("Ward", paste("Cophenetic correlation =",
round(cor(spe.ch, spe.ch.ward.coph), 3))))
abline(0, 1)
lines(lowess(spe.ch, spe.ch.ward.coph), col = "red")
Optimale Anzahl Cluster
library("labdsv")
## Select a dendrogram (Ward/chord) and apply three criteria
## to choose the optimal number of clusters
# Choose and rename the dendrogram ("hclust" object)
<- spe.ch.ward
hc # hc <- spe.ch.beta2
# hc <- spe.ch.complete
par(mfrow = c(1, 2))
# Average silhouette widths (Rousseeuw quality index)
<- numeric(nrow(spe))
Si for (k in 2:(nrow(spe) - 1))
{<- silhouette(cutree(hc, k = k), spe.ch)
sil <- summary(sil)$avg.width
Si[k]
}
<- which.max(Si)
k.best plot(1:nrow(spe), Si, type = "h",
main = "Silhouette-optimal number of clusters",
xlab = "k (number of clusters)", ylab = "Average silhouette width")
axis(1, k.best,paste("optimum", k.best, sep = "\n"), col = "red",
font = 2, col.axis = "red")
points(k.best,max(Si), pch = 16, col = "red",cex = 1.5)
# Optimal number of clusters according to matrix correlation
# statistic (Pearson)
# Homemade function grpdist from Borcard et al. (2018)
<- function(X)
grpdist
{require(cluster)
<- as.data.frame(as.factor(X))
veg <- daisy(veg, "gower")
distgr
distgr
}
<- data.frame(k = 1:nrow(spe), r = 0)
kt for (i in 2:(nrow(spe) - 1))
{<- cutree(hc, i)
gr <- grpdist(gr)
distgr <- cor(spe.ch, distgr, method = "pearson")
mt 2] <- mt
kt[i,
}
<- which.max(kt$r)
k.best plot(kt$k,kt$r, type = "h",
main = "Matrix correlation-optimal number of clusters",
xlab = "k (number of clusters)", ylab = "Pearson's correlation")
axis(1, k.best, paste("optimum", k.best, sep = "\n"),
col = "red", font = 2, col.axis = "red")
points(k.best, max(kt$r), pch = 16, col = "red", cex = 1.5)
# Optimal number of clusters according as per indicator species
# analysis (IndVal, Dufrene-Legendre; package: labdsv)
<- numeric(nrow(spe))
IndVal <- numeric(nrow(spe))
ng for (k in 2:(nrow(spe) - 1))
{<- indval(spe, cutree(hc, k = k), numitr = 1000)
iva <- factor(iva$maxcls[iva$pval <= 0.05])
gr <- length(levels(gr)) / k
ng[k] <- iva$indcls[iva$pval <= 0.05]
iv <- sum(iv)
IndVal[k]
}
<- which.max(IndVal[ng == 1]) + 1
k.best <- rep(1, nrow(spe))
col3 == 1] <- 3
col3[ng
par(mfrow = c(1, 2))
plot(1:nrow(spe), IndVal, type = "h",
main = "IndVal-optimal number of clusters",
xlab = "k (number of clusters)", ylab = "IndVal sum", col = col3)
axis(1,k.best,paste("optimum", k.best, sep = "\n"),
col = "red", font = 2, col.axis = "red")
points(which.max(IndVal),max(IndVal),pch = 16,col = "red",cex = 1.5)
text(28, 15.7, "a", cex = 1.8)
plot(1:nrow(spe),ng,
type = "h",
xlab = "k (number of clusters)",
ylab = "Ratio",
main = "Proportion of clusters with significant indicator species",
col = col3)
axis(1,k.best,paste("optimum", k.best, sep = "\n"),
col = "red", font = 2, col.axis = "red")
points(k.best,max(ng), pch = 16, col = "red", cex = 1.5)
text(28, 0.98, "b", cex = 1.8)
Final dendrogram with the selected clusters
# Choose the number of clusters
<- 4
k # Silhouette plot of the final partition
<- cutree(spe.ch.ward, k = k)
spech.ward.g <- silhouette(spech.ward.g, spe.ch)
sil rownames(sil) <- row.names(spe)
plot(sil, main = "Silhouette plot - Chord - Ward", cex.names = 0.8, col = 2:(k + 1), nmax = 100)
# Reorder clusters
library("gclus")
<- reorder.hclust(spe.ch.ward, spe.ch)
spe.chwo
# Plot reordered dendrogram with group labels
par(mfrow = c(1, 1))
plot(spe.chwo, hang = -1, xlab = "4 groups", ylab = "Height", sub = "",
main = "Chord - Ward (reordered)", labels = cutree(spe.chwo, k = k))
rect.hclust(spe.chwo, k = k)
# Plot the final dendrogram with group colors (RGBCMY...)
# Fast method using the additional hcoplot() function:
# Usage:
# hcoplot(tree = hclust.object,
# diss = dissimilarity.matrix,
# lab = object labels (default NULL),
# k = nb.clusters,
# title = paste("Reordered dendrogram from",deparse(tree$call),
# sep="\n"))
source("stat5-8/hcoplot.R")
hcoplot(spe.ch.ward, spe.ch, lab = rownames(spe), k = 4)
# Plot the Ward clusters on a map of the Doubs River
# (see Chapter 2)
source("stat5-8/drawmap.R")
drawmap(xy = spa, clusters = spech.ward.g, main = "Four Ward clusters along the Doubs River")
Miscellaneous graphical outputs
# konvertieren von "hclust" Objekt in ein Dendogram Objekt
<- as.dendrogram(spe.ch.ward)
dend
# Heat map of the dissimilarity matrix ordered with the dendrogram
heatmap(as.matrix(spe.ch), Rowv = dend, symm = TRUE, margin = c(3, 3))
# Ordered community table
# Species are ordered by their weighted averages on site scores.
# Dots represent absences.
library("vegan")
<- vegemite(spe, spe.chwo) or
32222222222 111111 1111
09876210543959876506473221341
Icme 5432121......................
Abbr 54332431.....1...............
Blbj 54542432.1...1...............
Anan 54432222.....111.............
Gyce 5555443212...11..............
Scer 522112221...21...............
Cyca 53421321.....1111............
Rham 55432333.....221.............
Legi 35432322.1...1111............
Alal 55555555352..322.............
Chna 12111322.1...211.............
Titi 53453444...1321111.21........
Ruru 55554555121455221..1.........
Albi 53111123.....2341............
Baba 35342544.....23322.........1.
Eslu 453423321...41111..12.1....1.
Gogo 5544355421..242122111......1.
Pefl 54211432....41321..12........
Pato 2211.222.....3344............
Sqce 3443242312152132232211..11.1.
Lele 332213221...52235321.1.......
Babl .1111112...32534554555534124.
Teso .1...........11254........23.
Phph .1....11...13334344454544455.
Cogo ..............1123......2123.
Satr .1..........2.123413455553553
Thth .1............11.2......2134.
29 sites, 27 species