##################################### #Load libraries ##################################### library(cluster) library(BiodiversityR) library(flashClust) library(clusterCrit) ##################################### #Load Data ##################################### # morphological measurements of two species of sunfish, standardized by standart length # each measure is therefore a proportion of individual length morphology<-read.csv(file="lepomis.csv", row.names=1, sep=",", header=TRUE) # This factor wil label the two species. The first 57 are bluegill, the next 86 are longear species<-c(rep("Bluegill",57),rep("Longear",86)) ##################################### #Run UPGMA cluster analysis with hclust ##################################### # Since we have standardized orphologic data, euclidean distance makes sense distance<-daisy(morphology, metric='euclidean', stand=TRUE) # Use hclust function for clustering, other methods "ward", "single", "complete", "centroid", "median" UPGMA<-hclust(distance, method="average") # faster alternative is flashClust UPGMA<-flashClust(distance, method="average") UPGMA # agglomerative coefficient coef.hclust(UPGMA) # Two plots produced, you will have to click to get each plot(UPGMA) # Get the tree distance between all points tree_dist<-cophenetic(UPGMA) # Plot the tree distance vs. the original euclidean distance plot(tree_dist, distance) # Cophenetic correlation between those two matrices, measure of the quality of the tree (1.0=perfect tree) cor(tree_dist, distance) #cut tree into 2, 3, 4 or 5 groups, no labels in agnes objects... cut_tree<-cutree(UPGMA,k=2:5) # Look at group memberships at the different cut points cut_tree # How many groups is best? Using the Calinski Harabasz index, K=3 group_membership<-cutree(UPGMA,2) intCriteria(as.matrix(morphology),group_membership,"Calinski_Harabasz") group_membership<-cutree(UPGMA,3) intCriteria(as.matrix(morphology),group_membership,"Calinski_Harabasz") group_membership<-cutree(UPGMA,4) intCriteria(as.matrix(morphology),group_membership,"Calinski_Harabasz") group_membership<-cutree(UPGMA,5) intCriteria(as.matrix(morphology),group_membership,"Calinski_Harabasz") group_membership<-cutree(UPGMA,6) intCriteria(as.matrix(morphology),group_membership,"Calinski_Harabasz") ##################################### #Run cluster analysis (single linkage agglomerative) using hclust ##################################### # All the same as above but with single linkage single<-hclust(distance, method="single") single plot(single) coef.hclust(single) tree_dist<-cophenetic(single) plot(tree_dist, distance) cor(tree_dist, distance) cut_tree<-cutree(single,k=2:5) cut_tree ##################################### #Run UPGMC cluster analysis ##################################### UPGMC<-hclust(distance, method="centroid") UPGMC plot(UPGMC) tree_dist<-cophenetic(UPGMC) plot(tree_dist, distance) cor(tree_dist, distance) ##################################### #Run divisive cluster with diana ##################################### distance<-daisy(morphology, metric='euclidean', stand=TRUE) div_cluster<-diana(distance, diss=TRUE) summary(div_cluster) plot(div_cluster) tree_dist<-cophenetic(div_cluster) plot(tree_dist, distance) cor(tree_dist, distance) ##################################### #Using the table function to check classification ##################################### distance<-daisy(morphology, metric='euclidean', stand=TRUE) UPGMA<-hclust(distance, method="average") plot(UPGMA) # cutting at 3 based on the criteria above cut_tree<-cutree(UPGMA,k=3) # Table function will give a classification table table(species,cut_tree)