#####################################
#Load libraries
#####################################
library(BiodiversityR)
library(MASS)
library(vegan)

#####################################
#Load Data
#####################################
# Load species data
community<-read.csv(file="example.csv", row.names='Sites', sep=",", header=TRUE)

# Load environmental data
envdata<-read.csv(file="example_env.csv", row.names='Sites', sep=",", header=TRUE)

# Run a CA with no environemtal constraints
ca<-cca(community)
summary(ca)
ordiplot(ca,type="text")

# Run a CCA with all environmental variables (this is usually a bad idea!)
# The period in the model (~ .) tells the analysis to include all variables in the dataset.
base_cca<-cca(community ~ .,envdata)
# The total inertia is still the same, now some portion of it is constrained by
# the environmental matrix
summary(base_cca)
# Look at the variance inflation factors, some indications there is still redundancy in the model.
vif.cca(base_cca)

# Perform an overal permutive ANOVA of the CCA
anova(base_cca)
# Test the significance of each variable in the model
anova(base_cca, by = "terms", perm = 1000)
# Test the significance of each axis
anova(base_cca, by = "axis", perm = 1000)
# triplot 
ordiplot(base_cca,type="t")

# We now need to reduce the model to only include the minimum variables.
# First, look at at correlation matrix to see if any variables are highlt correlated
cor_matrix<-cor(envdata[,2:13])
cor_matrix
# if you want to write the correlation matrix to a file and look at it
write.csv(cor_matrix,"correlations.csv")

# Define a reduced and full models for AIC model selection
reduced_cca<-cca(community ~ 1, envdata)
full_cca<-cca(community ~ spatial + env1 + env2 + env3 + env4 + env5 + env6 + env7 + env8 + env9 + env10 + env11 + env12 ,envdata)
# use the stepAIC function to select a model
model_fit <- ordistep(reduced_cca, scope=formula(full_cca))

# This final model has two variables, compare the constrained intertial to the full model with all variables
summary(model_fit)
vif.cca(model_fit)
# Now we have a simpler model that is explaining just as much variation

# Perform an overal permutive ANOVA of the CCA
anova(model_fit)
anova(model_fit, by = "terms", perm = 1000)
anova(model_fit, by = "axis", perm = 1000)
# triplot 
ordiplot(model_fit,type="t")

# Try a partial CCA where we eliminate the influce of the spatial variable
partial_cca<-cca(community ~ env7 + Condition(spatial), envdata)
summary(partial_cca)
vif.cca(partial_cca)
# Now we have a simpler model that is explaining just as much variation

# Perform an overal permutive ANOVA of the CCA
anova(partial_cca)
anova(partial_cca, by = "terms", perm = 1000)

# triplot 
ordiplot(partial_cca,type="t")

# RDA using the same variables
rda_model<-rda(community ~ spatial + env7, envdata)

summary(rda_model)

vif.cca(rda_model)

anova(rda_model)
anova(rda_model, by = "terms", perm = 1000)
anova(rda_model, by = "axis", perm = 1000)

# triplot 
ordiplot(rda_model,type="t")

# NMDS with weighted averages of environemtnal variables added
nmds<-metaMDS(community,k=2,expand = T)
plot(nmds$points,pch=16,xlab="NMDS 1",ylab="NMDS 2",ylim=c(-1.5,1.5),xlim=c(-4,4.5))
text(nmds$species,labels=names(community),col="red",cex=0.75)
env_wa<-wascores(nmds$points,envdata[,2:13],expand = T)
arrows(0,0,env_wa[,1],env_wa[,2],col="blue",length = 0.2)
text(env_wa,labels = names(envdata[,2:13]),col="blue",pch=0.75)