######################################
#Load libraries
#####################################
library(vegan)
library(BiodiversityR)

# Site by species matrix of sample data
# Data is stored in the variable "community"
community<-read.csv("sample.csv", header=TRUE, row.names="site")

#####################################
# View the matrix and some properties
#####################################
# to view something, just enter the name 
community
# dimensions (rows x columns)
dim(community)
# total sum
sum(community)

# column and row names
colnames(community)
rownames(community)

# Arithmetic operators
# add 1 to all values
community+1

# square all values
community^2

# logical operators
# which occurences of Specis5 are greater than 20
community$Species5>20

# Selecting portions of a matrix
# matrix[row,column]
# a single element
community[4,6]

# a whole row (blank after comma selects all)
community[4,]

# a whole column
community[,6]

# a range of rows and columns
# select rows 3-4 and columns 4-8
community[3:4,4:8]

# select rows or columns by name
community[,"Species3"]

community["B_08",]


# transpose a matrix
t(community)

#log+1 transform community data
# here, we are saving the transformed matrix as community_log
community_log <- log(community+1)

# The BiodiversityR package has a function disttransform with a number of 
# other tranformations methods - "hellinger", "chord", "profiles", 
# "chi.square", "log", "square" or "pa"
# Profile is percent occurence
# here, we will use decostand from the vegan package
# standardize by row or column total (margin= 1 or 2)
community_total<-decostand(community, MARGIN=1, method="total")
# convert into 0/1 (presence/absence) matrix
community_pa<-decostand(community, method="pa")

# Sorting and ordering
community$Species5

# species 5 sorted
sort(community$Species5)
#decreasing order
sort(community$Species5,decreasing=T)
# species 5 ordered
order(community$Species5)
# decreasing order
order(community$Species5, decreasing=T)
# sort the community matrix by Species5 abundance
community[order(community$Species5),]


# use apply function to get row and column means
colmeans<-apply(community,2,mean)
rowmeans<-apply(community,1,mean)


# view original and transformed data
community
community_log
community_total
community_pa

# Use apply and community_pa to quantify # species by site or number of occurences by a species

# number of species by site
apply(community_pa,1,sum)

# number of occurences by species
apply(community_pa,2,sum)

# use mean instead of sum to get proportional measures
apply(community_pa,1,mean)

apply(community_pa,2,mean)

# Some ways to deal with rare species

# Put some of these pieces together to drop rare species or low abundance sites from a matrix
# drop the three sites with the lowest abundacne
row_abundance<-apply(community,1,sum)
cutoff<-sort(row_abundance, decreasing=FALSE)[3]
community_dropsites<-community[row_abundance>cutoff,]

# calculate total abundance of each species 
sp_abundance<-apply(community,2,sum)
sp_abundance
# eliminate species with a total abundance <10
community[,sp_abundance>10]

# calculate species occurence
sp_occurence<-apply(community_pa,2,sum)
sp_occurence

# eliminate species that occur less than 5 times
community[,sp_occurence>5]

# Create a factor
site_category<-c("Y08","Y08","Y08","Y08","Y08","Y08","Y09","Y09","Y09","Y09","Y09","Y09","Y07","Y07","Y07","Y07","Y07","Y07")
# Right now, R is treating this as text, not a cagetory
summary(site_category)
is.factor(site_category)
# convert it to a factor
site_category<-factor(site_category)
# Now the summary information is different (number of observations per category)
summary(site_category)
is.factor(site_category)
# Levels function tells you what levels are in a factor
levels(site_category)

# Use subset function to select sites based on species abundances
# select sites where species 1 occured
community_sp1_present<-subset(community,Species1>0)
# select sites with either species 1 or 2
community_sp1or2_present<-subset(community,Species1+Species2>0)