#####################################
#Load libraries
#####################################
library(vegan)

#####################################
#Load Data
#####################################

community<-read.csv(file="example1.csv", row.names='Sites', sep=",", header=TRUE)
community2<-read.csv(file="example2.csv", row.names='Sites', sep=",", header=TRUE)

####################################
# Plot the raw data (each species abundance across the sample gradient) to see the structure of the data
####################################
plot(seq(1:29),community[,1],xlab="Sample",ylab="Abundance",type="n", main="community 1")
sp_colors<-rainbow(10)
for(s in 1:10)
{
  points(seq(1:29),community[,s],pch=19,cex=0.75,col=sp_colors[s],type="b")
}

plot(seq(1:29),community2[,1],xlab="Sample",ylab="Abundance",type="n", main="community 2")
sp_colors<-rainbow(10)
for(s in 1:10)
{
  points(seq(1:29),community2[,s],pch=19,cex=0.75,col=sp_colors[s],type="b")
}


#####################################
# principal Coordinates Analysis
#####################################

bray_distance<-vegdist(community, method="bray")
prin_coord<-cmdscale(bray_distance, k=3, eig=TRUE)
prin_coord

# Function ordiplot produces a basic ordination plot
# Note - you will get a warning about species scores not being available. That
# is OK, recall that PCoA is a distance based ordination so the species information
# is lost in the first step. The function ordiplot tries to plot samples and 
# species in combined ordination space, something you can't do (easily) with PCoA
ordiplot(prin_coord,type="text")

# Get the Euclidean distance among all samples in ordination space
# If the analysis worked, this should be very similar 
ordination_distance<-vegdist(prin_coord$points,method="euclidean")
plot(ordination_distance,bray_distance)
model<-lm(bray_distance~ordination_distance)
abline(model,col="red")

# Total amount of variation explained by the full ordination (k=3)
100*cor(ordination_distance,bray_distance)^2

# Estimate relative importance of each axis
prin_coord$eig/sum(prin_coord$eig)

# Estimate % variation accounted for on each axis
pct_var<-0
for(k in 1:3) 
	{
	pct_var[k]<-100*cor(vegdist(prin_coord$points[,k],method="euclidean"),bray_distance)^2
	}
pct_var

# Recall that the purpose here is to construct biologically meaningful gradients to represent the species data.

# Look at the correlation between axis 1 and each species
cor(prin_coord$points[,1],community)

# values with the largest magnitude (negative or positive) are most tightly linked with that axis
# species 1 and 9, we can plot those
plot(prin_coord$points[,1],community$sp1, xlab="axis 1", ylab="Species1", pch=19, col=sp_colors[1])
plot(prin_coord$points[,1],community$sp9, xlab="axis 1", ylab="Species9",  pch=19, col=sp_colors[9])
# species 5 had the lowest value
plot(prin_coord$points[,1],community$sp5, xlab="axis 1", ylab="Species5",  pch=19, col=sp_colors[5])

# What about axis 2?
cor(prin_coord$points[,2],community)

# that is where species 5 variability is explained
plot(prin_coord$points[,2],community$sp5, xlab="axis 2", ylab="Species5",  pch=19, col=sp_colors[5])

# How many dimensions (k) should we use? 
# Look at the Goodness of Fitt for a range of k values, want to maximize GOF
GOF_values <- numeric(10)
# run a loop, specifying a different k at each iteration, saving the GOF value for each K
for (k in 2:11) {
prin_coord<-cmdscale(bray_distance, k, eig=TRUE)
GOF_values[k]<- prin_coord$GOF[2]
}
# Plot the GOF values by k, this is a Scree Plot
plot(GOF_values)

# What appears to be the best K?

# Customizing plot colors 
# Let's say our samples are in groups, how do we plot with different symbols or colors?
# Set up our groups (this will often be part of your dataset), this is just an example
sample_groups<-factor(c(rep("A",10),rep("B",10),rep("C",9)))
# Basic plot axis 1 vs. 2 with all black circles (pch=19 is a filled circle)
plot(prin_coord$points[,1:2],pch=19,xlab="Axis 1",ylab="Axis 2")
# Let plot groups A, B and C
# you need a vector of colors, the same number of groups that you have 
# we have three groups (A, B, and C, and need three colors)
# If you have lots of groups, there are ways to get automatic colors, rainbow(10) will give 10 distinct colors
# here are three rainbow colors
rainbow(3)
# you can name colors, there are 656 predefined
colors()
# this would pick three random colors from that list 
sample(colors(),3,replace=F)
# Here, we manually name three colors
sample_colors<-c("red","green","blue")
# Here, we can plot points by colors for each factor
# you specify col=vector_of_colors[factor] ... make sure you have a factor
plot(prin_coord$points[,1:2],pch=19,xlab="Axis 1",ylab="Axis 2",col=sample_colors[sample_groups])

#What about symbols? There are 25 predefined symbols
plot(seq(1:25),rep(1,25),pch=seq(1:25),axes=F,xlab="",ylab="",cex=1.5,ylim=c(0,1.5))
text(seq(1:25),rep(0.9,25),labels = seq(1:25))

# Pick three symbols
sample_symbols<-c(2, 15, 18)
# plot sample with different symbols
plot(prin_coord$points[,1:2],pch=sample_symbols[sample_groups],xlab="Axis 1",ylab="Axis 2",col="black")

# combine colors and symbols
plot(prin_coord$points[,1:2],pch=sample_symbols[sample_groups],xlab="Axis 1",ylab="Axis 2",col=sample_colors[sample_groups])

# Add a legend, 
legend("topright",legend=c("A","B","C"),pch=sample_symbols,col=sample_colors)


# Do the exact same analysis with the second sample dataset - community2
bray_distance2<-vegdist(community2, method="bray")
prin_coord2<-cmdscale(bray_distance2, k=3, eig=TRUE)
prin_coord2

ordiplot(prin_coord2, type="text")

ordination_distance2<-vegdist(prin_coord2$points,method="euclidean")
plot(ordination_distance2,bray_distance2)
model<-lm(bray_distance2~ordination_distance2)
abline(model,col="red")

# Total amount of variation explained by the full ordination
100*cor(ordination_distance2,bray_distance2)^2

# Estimate relative importance of each axis
100*(prin_coord2$eig/sum(prin_coord2$eig))


# Estimate % variation accounted for on each axis
pct_var<-0
for(k in 1:3) 
	{
	pct_var[k]<-100*cor(vegdist(prin_coord2$points[,k],method="euclidean"),bray_distance2)^2
	}
pct_var

cor(prin_coord2$points[,1],community2$sp10)^2
plot(prin_coord2$points[,1:2],pch=16,cex=(community2$sp3+0.5))