##################################### #Load libraries ##################################### library(vegan) ##################################### #Load Data ##################################### community<-read.csv(file="example1.csv", row.names='Sites', sep=",", header=TRUE) community2<-read.csv(file="example2.csv", row.names='Sites', sep=",", header=TRUE) #################################### # Plot the raw data (each species abundance across the sample gradient) to see the structure of the data #################################### plot(seq(1:29),community[,1],xlab="Sample",ylab="Abundance",type="n", main="community 1") sp_colors<-rainbow(10) for(s in 1:10) { points(seq(1:29),community[,s],pch=19,cex=0.75,col=sp_colors[s],type="b") } plot(seq(1:29),community2[,1],xlab="Sample",ylab="Abundance",type="n", main="community 2") sp_colors<-rainbow(10) for(s in 1:10) { points(seq(1:29),community2[,s],pch=19,cex=0.75,col=sp_colors[s],type="b") } ##################################### # principal Coordinates Analysis ##################################### bray_distance<-vegdist(community, method="bray") prin_coord<-cmdscale(bray_distance, k=3, eig=TRUE) prin_coord # Function ordiplot produces a basic ordination plot # Note - you will get a warning about species scores not being available. That # is OK, recall that PCoA is a distance based ordination so the species information # is lost in the first step. The function ordiplot tries to plot samples and # species in combined ordination space, something you can't do (easily) with PCoA ordiplot(prin_coord,type="text") # Get the Euclidean distance among all samples in ordination space # If the analysis worked, this should be very similar ordination_distance<-vegdist(prin_coord$points,method="euclidean") plot(ordination_distance,bray_distance) model<-lm(bray_distance~ordination_distance) abline(model,col="red") # Total amount of variation explained by the full ordination (k=3) 100*cor(ordination_distance,bray_distance)^2 # Estimate relative importance of each axis prin_coord$eig/sum(prin_coord$eig) # Estimate % variation accounted for on each axis pct_var<-0 for(k in 1:3) { pct_var[k]<-100*cor(vegdist(prin_coord$points[,k],method="euclidean"),bray_distance)^2 } pct_var # Recall that the purpose here is to construct biologically meaningful gradients to represent the species data. # Look at the correlation between axis 1 and each species cor(prin_coord$points[,1],community) # values with the largest magnitude (negative or positive) are most tightly linked with that axis # species 1 and 9, we can plot those plot(prin_coord$points[,1],community$sp1, xlab="axis 1", ylab="Species1", pch=19, col=sp_colors[1]) plot(prin_coord$points[,1],community$sp9, xlab="axis 1", ylab="Species9", pch=19, col=sp_colors[9]) # species 5 had the lowest value plot(prin_coord$points[,1],community$sp5, xlab="axis 1", ylab="Species5", pch=19, col=sp_colors[5]) # What about axis 2? cor(prin_coord$points[,2],community) # that is where species 5 variability is explained plot(prin_coord$points[,2],community$sp5, xlab="axis 2", ylab="Species5", pch=19, col=sp_colors[5]) # How many dimensions (k) should we use? # Look at the Goodness of Fitt for a range of k values, want to maximize GOF GOF_values <- numeric(10) # run a loop, specifying a different k at each iteration, saving the GOF value for each K for (k in 2:11) { prin_coord<-cmdscale(bray_distance, k, eig=TRUE) GOF_values[k]<- prin_coord$GOF[2] } # Plot the GOF values by k, this is a Scree Plot plot(GOF_values) # What appears to be the best K? # Customizing plot colors # Let's say our samples are in groups, how do we plot with different symbols or colors? # Set up our groups (this will often be part of your dataset), this is just an example sample_groups<-factor(c(rep("A",10),rep("B",10),rep("C",9))) # Basic plot axis 1 vs. 2 with all black circles (pch=19 is a filled circle) plot(prin_coord$points[,1:2],pch=19,xlab="Axis 1",ylab="Axis 2") # Let plot groups A, B and C # you need a vector of colors, the same number of groups that you have # we have three groups (A, B, and C, and need three colors) # If you have lots of groups, there are ways to get automatic colors, rainbow(10) will give 10 distinct colors # here are three rainbow colors rainbow(3) # you can name colors, there are 656 predefined colors() # this would pick three random colors from that list sample(colors(),3,replace=F) # Here, we manually name three colors sample_colors<-c("red","green","blue") # Here, we can plot points by colors for each factor # you specify col=vector_of_colors[factor] ... make sure you have a factor plot(prin_coord$points[,1:2],pch=19,xlab="Axis 1",ylab="Axis 2",col=sample_colors[sample_groups]) #What about symbols? There are 25 predefined symbols plot(seq(1:25),rep(1,25),pch=seq(1:25),axes=F,xlab="",ylab="",cex=1.5,ylim=c(0,1.5)) text(seq(1:25),rep(0.9,25),labels = seq(1:25)) # Pick three symbols sample_symbols<-c(2, 15, 18) # plot sample with different symbols plot(prin_coord$points[,1:2],pch=sample_symbols[sample_groups],xlab="Axis 1",ylab="Axis 2",col="black") # combine colors and symbols plot(prin_coord$points[,1:2],pch=sample_symbols[sample_groups],xlab="Axis 1",ylab="Axis 2",col=sample_colors[sample_groups]) # Add a legend, legend("topright",legend=c("A","B","C"),pch=sample_symbols,col=sample_colors) # Do the exact same analysis with the second sample dataset - community2 bray_distance2<-vegdist(community2, method="bray") prin_coord2<-cmdscale(bray_distance2, k=3, eig=TRUE) prin_coord2 ordiplot(prin_coord2, type="text") ordination_distance2<-vegdist(prin_coord2$points,method="euclidean") plot(ordination_distance2,bray_distance2) model<-lm(bray_distance2~ordination_distance2) abline(model,col="red") # Total amount of variation explained by the full ordination 100*cor(ordination_distance2,bray_distance2)^2 # Estimate relative importance of each axis 100*(prin_coord2$eig/sum(prin_coord2$eig)) # Estimate % variation accounted for on each axis pct_var<-0 for(k in 1:3) { pct_var[k]<-100*cor(vegdist(prin_coord2$points[,k],method="euclidean"),bray_distance2)^2 } pct_var cor(prin_coord2$points[,1],community2$sp10)^2 plot(prin_coord2$points[,1:2],pch=16,cex=(community2$sp3+0.5))