##################################### #Load libraries ##################################### library(vegan) # The file contains code for a function to test the significance of loadings # Written by the authors of that paper, make sure the file is in your working directory source("sigpca2.R") ##################################### #Load Data ##################################### community<-read.csv("example.csv", row.names='Sites', sep=",", header=TRUE) community2<-read.csv("example_linear.csv", row.names='Sites', sep=",", header=TRUE) ##################################### # # Basic PCA is a eigen decomposition of a variance/covariance or correlation matrix # ##################################### # calculate variance/covariance of the data com_varcovar<-var(community) pca_obj<-eigen(com_varcovar) pca_obj # sum of the eigenvalues represents total variation sum(pca_obj$values) # percent of variance on each axis # each eighenvalue divided by the total pca_obj$values/sum(pca_obj$values) # calculate sample scores from the eigenvectors and the original data # center the matrix: subtract each column by the column mean # now, each column has a mean of 0, values are deviations from column means centered_com<-as.matrix(community-apply(community,2,mean)) # multiply the centered matrix by the eigenvalues pca_obj_scores<-centered_com %*% pca_obj$vectors # Plot the first two axes plot(pca_obj_scores[,1:2],pch=16,xlab="Axis 1 (45.6%)",ylab="Axis 2 (32.3%)") # plot species loadings in the same space text(pca_obj$vectors[,1:2],pch=16,labels=names(community),col="red") # Most of the time you want to perform a scaled PCA # function scale will scale all columns to unit variance # In this eacmple, this won't change too much since our sample dataset is artifically uniform community_scaled<-scale(community) # Now we have one standard deviation of variation per column apply(community_scaled,2,sd) # perform same as above com_varcovar<-var(community_scaled) pca_obj<-eigen(com_varcovar) pca_obj # this now sums to 10...representing our 10 variables each with one unit of vatiation sum(pca_obj$values) # the rest will be similar pca_obj$values/sum(pca_obj$values) centered_com<-as.matrix(community-apply(community,2,mean)) pca_obj_scores<-centered_com %*% pca_obj$vectors # Plot the first two axes plot(pca_obj_scores[,1:2],pch=16,xlab="Axis 1 (45.6%)",ylab="Axis 2 (32.3%)") # plot species loadings in the same space text(pca_obj$vectors[,1:2],pch=16,labels=names(community),col="red") # Use the Pern-Neto et al. (2003) approach to calculate "p-values" for each loading # Note that these are not real p-values, use with caution! # This is quite computer intensive, may take a few minutes (drop permutations on slower machines) sigpca2(community_scaled, permutations=1000) ##################################### # Principle Components Analysis # function prcomp (stats package which is automaticall loaded) ##################################### # Results identical to what we have above... prin_comp<-prcomp(community, scale=FALSE) summary(prin_comp) ordiplot(prin_comp, type="text") # total variation in the dataset, identical to what we have above sum(prin_comp$sdev^2) # Most of the time you will want to standardize your data # With this function, scale=TRUE does this prin_comp<-prcomp(community, scale=TRUE) summary(prin_comp) # In this approach the total amount of variance standardized # One unit of variability per variable... 10 species = 10 sd sum(prin_comp$sdev^2) # calculate % variance accounted for on firs axis prin_comp$sdev[1]^2/sum(prin_comp$sdev^2) # Principle coordinates (PCoA) from last week with a Euclidean distance matrix, should be the same distance<-vegdist(community, method="euclidean") prin_coord<-cmdscale(distance, k=10, eig=TRUE) ordiplot(prin_coord, type="text") # Note the similarity in the plots, only difference is they are rotated # and there are no species plotted here. # Eigenvales for the principal coordinates prin_coord$eig # Calcualte % variance accounted for on axis 1 and 2 of the PCO # First eigenvalue divided by the sum of all 10 prin_coord$eig[1:2]/sum(prin_coord$eig) # same value as for both PCA approaches above ##################################### # Principle Components Analysis # function rda (vegan package) ##################################### prin_comp<-rda(community) summary(prin_comp) # generlly the same results as above # the default is unscaled # variance in output described as "inertia" # some of the loadings are scaled differently ordiplot(prin_comp, type="text") # broken stick model screeplot(prin_comp,bstick=TRUE) # Sum of the eigenvalues = inertia sum(prin_comp$CA$eig) # calculate % variance accounted for on firs axis prin_comp$CA$eig[1]/sum(prin_comp$CA$eig) ##################################################### # PCA of the same data with a single outlier added ##################################################### # arbitrarily set one value in the matrix to a larger value # here, set row 4 column 6 to 5 community[4,6]<-5 prin_comp<-rda(community) summary(prin_comp) ordiplot(prin_comp, type="text") # not nearly so bad if we do a scaled PCA # the reason is that variance is scaled to one unit per variable prin_comp<-rda(community, scale=T) summary(prin_comp) ordiplot(prin_comp, type="text") # what does that single outlier do to our PCoA? pcoa<-cmdscale(vegdist(community)) ordiplot(pcoa, type="text") ###################################################### # Factor analysis ###################################################### # Try a factor analysis with no rotation fac<-factanal(community,6,scores="regression",rotation="none") # Note the total % variance accounted for fac # plot the factor analysis, adding species from the loadings plot(fac$scores[,1],fac$scores[,2]) text(fac$loadings[,1],fac$loadings[,2],colnames(community),col="red") # Compare that to a standard PCA pca<-rda(community,scale=TRUE) summary(pca) plot(pca) # Try a factor analysis with varimax rotation fac<-factanal(community,6,scores="regression",rotation="varimax") fac # plot the factor analysis, adding species from the loadings plot(fac$scores[,1],fac$scores[,2]) text(fac$loadings[,1],fac$loadings[,2],colnames(community),col="red") plot(fac$scores[,2],fac$scores[,3]) text(fac$loadings[,2],fac$loadings[,3],colnames(community),col="red")