#R语言 | 计算微生物群落alpha多样性及其可视化环境驱动因子
library(ggplot2)
library(vegan)
library(randomForest)
library(reshape2)
library(psych)
library(dplyr)
library(rfPermute)
##链接:https://pan.baidu.com/s/1NnBquWKGp1cMH2HZXGw-jA 提取码:hayq
#数据按照对应的OTU表,物种文件和分组文件
otu<-read.csv("D://test/test_otu.csv",row.names = 1)
design<-read.csv("D://test/test_design.csv")
#1.计算群落多样性,这里随机构建细菌、真菌、古菌和原生生物数据集
otu_bac=otu[1:379,]
otu_fun=otu[600:1009,]
otu_arc=otu[201:679,]
otu_pro=otu[700:999,]
#计算shannon指数
library(vegan)
index_bac<-diversity(t(otu_bac), index= "shannon", MARGIN = 1)
index_fun<-diversity(t(otu_fun), index= "shannon", MARGIN = 1)
index_arc<-diversity(t(otu_arc), index= "shannon", MARGIN = 1)
index_pro<-diversity(t(otu_pro), index= "shannon", MARGIN = 1)
df_index=data.frame(index_bac,index_fun,index_arc,index_pro)
#### 合并数据框
df_plot<-df_index%>% mutate(spearman_df=row.names(.))%>%melt()
#图1
p1 = ggplot(df_plot,mapping = aes(x=variable,y=value,colour=variable))+
stat_boxplot(geom="errorbar",position=position_dodge(width=0.2),width=0.3)+
geom_boxplot(notch = F,size=0.7)+
geom_jitter(position=position_jitter(0.2), size=2.5)+
scale_color_manual(values=c("#DD5F60","#9AC0CD","#8FBC8F","#3CB371"))+theme_bw()+
labs(x="", y="Shannon index")+theme_bw()+
theme(axis.text=element_text(colour='black',size=9))+
theme(strip.background = element_rect(fill=c("gray99")))+
theme(strip.text = element_text(size = 12,face = 'bold',colour = "gray2"))+
theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank())
#计算Spearman相关分析系数
df<-cbind(design[,4:12],df_index)
spearman_df <- corr.test(df, method = 'spearman', adjust = 'none')
spearman_df<- data.frame(spearman_df$r)
spearman_df<-spearman_df[1:9,10:13]
####采用randomForest包计算环境因子的贡献重要性
#设置随机种子,使结果能够重现,提取预测因子的解释率
set.seed(1234)
rf_results<-rfPermute(index_bac~., data =df[,c(1:10)], importance = TRUE, ntree = 1000, num.cores = 15)
predictor_1<- data.frame(importance(rf_results, scale = TRUE), check.names = FALSE)
set.seed(1234)
rf_results<-rfPermute(index_fun~., data =df[,c(1:9,11)], importance = TRUE, ntree = 1000, num.cores = 15)
predictor_2<- data.frame(importance(rf_results, scale = TRUE), check.names = FALSE)
set.seed(1234)
rf_results<-rfPermute(index_arc~., data =df[,c(1:9,12)], importance = TRUE, ntree = 1000, num.cores = 15)
predictor_3<- data.frame(importance(rf_results, scale = TRUE), check.names = FALSE)
set.seed(1234)
rf_results<-rfPermute(index_pro~., data =df[,c(1:9,13)], importance = TRUE, ntree = 1000, num.cores = 15)
predictor_4<- data.frame(importance(rf_results, scale = TRUE), check.names = FALSE)
#合并随机森林结果
randomForest_df<-data.frame(predictor_1[,1],predictor_2[,1],predictor_3[,1],predictor_4[,1])
rownames(randomForest_df)<-rownames(predictor_1)
colnames(randomForest_df)<-colnames(spearman_df)
#转化为长数据
randomForest_io<-randomForest_df%>% mutate(randomForest_df=row.names(.))%>%melt()
spearman_io<-spearman_df%>% mutate(spearman_df=row.names(.))%>%melt()
#图b
p2 = ggplot() +geom_tile(data = spearman_io,aes(x =spearman_df, y = variable, fill = value)) +
scale_fill_gradientn(colors = c('#3CB371', 'white', '#DD5F60'), limit = c(-1, 1)) +
geom_point(data = randomForest_io, aes(x = randomForest_df, y = variable, size = value), shape = 1) +
scale_size_continuous(range = c(0,12)) +
labs(size = 'MSE (%)')+
theme_minimal()+
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme(axis.text=element_text(colour='black',size=9))+
labs(y = '', x = '', fill = 'Spearman R')
#拼图
library(cowplot)
cowplot::plot_grid(p1,p2, nrow= 1, rel_widths = c(1, 2), labels=LETTERS[1:2])