###################################### #Load libraries ##################################### library(vegan) library(BiodiversityR) # Site by species matrix of sample data # Data is stored in the variable "community" community<-read.csv("sample.csv", header=TRUE, row.names="site") ##################################### # View the matrix and some properties ##################################### # to view something, just enter the name community # dimensions (rows x columns) dim(community) # total sum sum(community) # column and row names colnames(community) rownames(community) # Arithmetic operators # add 1 to all values community+1 # square all values community^2 # logical operators # which occurences of Specis5 are greater than 20 community$Species5>20 # Selecting portions of a matrix # matrix[row,column] # a single element community[4,6] # a whole row (blank after comma selects all) community[4,] # a whole column community[,6] # a range of rows and columns # select rows 3-4 and columns 4-8 community[3:4,4:8] # select rows or columns by name community[,"Species3"] community["B_08",] # transpose a matrix t(community) #log+1 transform community data # here, we are saving the transformed matrix as community_log community_log <- log(community+1) # The BiodiversityR package has a function disttransform with a number of # other tranformations methods - "hellinger", "chord", "profiles", # "chi.square", "log", "square" or "pa" # Profile is percent occurence # here, we will use decostand from the vegan package # standardize by row or column total (margin= 1 or 2) community_total<-decostand(community, MARGIN=1, method="total") # convert into 0/1 (presence/absence) matrix community_pa<-decostand(community, method="pa") # Sorting and ordering community$Species5 # species 5 sorted sort(community$Species5) #decreasing order sort(community$Species5,decreasing=T) # species 5 ordered order(community$Species5) # decreasing order order(community$Species5, decreasing=T) # sort the community matrix by Species5 abundance community[order(community$Species5),] # use apply function to get row and column means colmeans<-apply(community,2,mean) rowmeans<-apply(community,1,mean) # view original and transformed data community community_log community_total community_pa # Use apply and community_pa to quantify # species by site or number of occurences by a species # number of species by site apply(community_pa,1,sum) # number of occurences by species apply(community_pa,2,sum) # use mean instead of sum to get proportional measures apply(community_pa,1,mean) apply(community_pa,2,mean) # Some ways to deal with rare species # Put some of these pieces together to drop rare species or low abundance sites from a matrix # drop the three sites with the lowest abundacne row_abundance<-apply(community,1,sum) cutoff<-sort(row_abundance, decreasing=FALSE)[3] community_dropsites<-community[row_abundance>cutoff,] # calculate total abundance of each species sp_abundance<-apply(community,2,sum) sp_abundance # eliminate species with a total abundance <10 community[,sp_abundance>10] # calculate species occurence sp_occurence<-apply(community_pa,2,sum) sp_occurence # eliminate species that occur less than 5 times community[,sp_occurence>5] # Create a factor site_category<-c("Y08","Y08","Y08","Y08","Y08","Y08","Y09","Y09","Y09","Y09","Y09","Y09","Y07","Y07","Y07","Y07","Y07","Y07") # Right now, R is treating this as text, not a cagetory summary(site_category) is.factor(site_category) # convert it to a factor site_category<-factor(site_category) # Now the summary information is different (number of observations per category) summary(site_category) is.factor(site_category) # Levels function tells you what levels are in a factor levels(site_category) # Use subset function to select sites based on species abundances # select sites where species 1 occured community_sp1_present<-subset(community,Species1>0) # select sites with either species 1 or 2 community_sp1or2_present<-subset(community,Species1+Species2>0)