本节内容属于思农数据工程产物,欢迎大家批评指正。
差异代谢物完整流程分析-t-test,整理出常用所有阈值结果,另外对于可视化结果进行进一步美化和调整。
rm(list = ls())#清空环境
#加载工作目录
setwd("C:/代谢组/data")
#确定结果输出路径
outpath <- "C:/代谢组/result"
diffpath.2 = paste(outpath,"/2_差异代谢物分析/",sep = "")
dir.create(diffpath.2)
#载入相关包
library(dplyr)
library(phyloseq)
library(ggplot2)
#加载ps文件,包含代谢物相对丰度表,样本信息,代谢物信息
ps <- readRDS("analysis1_level24_ra_metabolism.rds")
ps= ps %>%filter_taxa(function(x) sum(x ) > 0 , TRUE)#过滤在任一样本中都不存在的微生物
data <- t(data.frame((otu_table(ps))))#提取代谢物相对丰度数据
group <- sample_data(ps)#提取组别信息
data <- data[group$ID,]#保证代谢物数据和丰度数据样本顺序一致
data <- cbind(group[,1:2],data)#添加ID和组别列
data$Group <- factor(data$Group)#将组别列转换为因子,levels参数可指定因子顺序,labels参数可指定分组标签
data.raw <- data#原始数据保存环境中
#提取各组平均丰度
tax <- data.frame(tax_table(ps))
tax <- tax[,c("Mean1","Mean2","Mean3")]
tax1 <-data.frame(matrix(as.numeric((unlist(tax))),ncol=ncol(tax),nrow=nrow(tax)))#将数据调整为数值变量,便于计算
rownames(tax1) <- rownames(tax)#行名
colnames(tax1) <- colnames(tax)#列名
#---Group1-Group0----
#筛选对应分组变量
group1 <- "Group1-Group0"
log2FC <-log2((tax1$Mean1) /(tax1$Mean2))
data <- data.raw[which(!data.raw$Group=="Group2"),]
#差异检验
result=data.frame(ID=colnames(data)[3:ncol(data)],
p=sapply(data[,3:ncol(data)], function(x){t.test(x~data$Group)[["p.value"]]}),
logFC=log2FC)
#P值进行BH校正
result <- cbind(result,p_adj=p.adjust(result$p, method = "BH"))
#到这与上次更新保持一致
#新增内容,多阈值结果统计
#根据不同阈值筛选差异微生物
result$level_padj_fc1 = as.factor(ifelse(as.vector(result$logFC)>0&as.vector(result$p_adj)<0.05, "enriched",
ifelse(as.vector(result$logFC)<0&as.vector(result$p_adj)<0.05, "depleted","nosig")))
result$level_fc2 = as.factor(ifelse(as.vector(result$logFC)>1&as.vector(result$p)<0.05, "enriched",
ifelse(as.vector(result$logFC)<(-1)&as.vector(result$p)<0.05, "depleted","nosig")))
result$level_fc1.5=as.factor(ifelse(as.vector(result$logFC)>log2(1.5)&as.vector(result$p)<0.05, "enriched",
ifelse(as.vector(result$logFC)<(-log2(1.5))&as.vector(result$p)<0.05, "depleted","nosig")))
result$level_fc1.2=as.factor(ifelse(as.vector(result$logFC)>log2(1.2)&as.vector(result$p)<0.05, "enriched",
ifelse(as.vector(result$logFC)<(-log2(1.2))&as.vector(result$p)<0.05, "depleted","nosig")))
result$level_fc1=as.factor(ifelse(as.vector(result$logFC)>0&as.vector(result$p)<0.05, "enriched",
ifelse(as.vector(result$logFC)<0&as.vector(result$p)<0.05, "depleted","nosig")))
result1<-result
result1$Genus = row.names(result1)#定义一列方便画图和流程化
result1 <- result1[order(result$level_padj_fc1,result1$level_fc2,result1$level_fc1.5,result1$level_fc1.2,result1$level_fc1),]#按顺序排列
#导出检验结果
result1 <- cbind(tax1[rownames(result1),c("Mean1","Mean2")],result1)
file = paste(diffpath.2,"/",group1,"t.test.csv",sep = "")
write.csv(result1,file,quote = TRUE)
#整理作图结果,以p<0.05,差异倍数选定为1作为阈值进行示例展示,具体阈值确定根据实际情况确定
result2 <- result1 %>%
dplyr::mutate(ord = logFC^2) %>%
dplyr::filter(level_fc1!= "nosig") %>%
dplyr::arrange(desc(ord)) %>%
head(n = 5)#添加变化率前五标签
file = paste(diffpath.2,"/",group1,"_plotlabel.csv",sep = "")
write.csv(result2,file,quote = T)
result3 <- result1 %>%
dplyr::mutate(ord = logFC^2) %>%
dplyr::filter(level_fc1!= "nosig") %>%
dplyr::arrange(desc(ord))#总结所有差异物种
file = paste(diffpath.2,"/",group1,"_all_difference_metabolism.csv",sep = "")
write.csv(result3,file,quote = TRUE)
#可视化,指定图例标签,包含level和其具体数量
ld <- paste("depleted",nrow(result1[result1$level_fc1=="depleted",]),sep = " ")
lr <- paste("enriched",nrow(result1[result1$level_fc1=="enriched",]),sep = " ")
ln <- paste("nosig",nrow(result1[result1$level_fc1=="nosig",]),sep = " ")
p <- ggplot(result1,aes(x =logFC ,y = -log10(p), colour=level_fc1)) +
geom_point(alpha=0.83,size=1.1) +
geom_point(data=result2,aes(x =logFC ,y = -log10(p), fill=level_fc1),shape=21,size=2,colour="black")+
scale_fill_manual(values = c("enriched"="#ffad73","depleted"="#26b3ff","nosig"="grey"))+
geom_hline(yintercept=-log10(0.05),linetype="dashed",color = 'black',size = 0.5) +
geom_vline(xintercept=c(-1,1),linetype="dashed",color = 'black',size = 0.5) +
ggrepel::geom_text_repel(data=result2, aes(x =logFC ,y = -log10(p), label=Genus), show.legend=FALSE,force=2,size=3) +
scale_color_manual(values=c("enriched"="#ffad73","depleted"="#26b3ff","nosig"="grey"),
labels=c("enriched"=lr,"depleted"=ld,"nosig"=ln)) +
ggtitle(group1) + theme(panel.border=element_rect(colour= "black",fill=NA,size=0.75),
panel.grid.minor=element_blank(),
panel.background=element_blank(),
plot.background=element_blank(),
axis.title=element_text(face="bold",color="black",size=11),
plot.title =element_text(face="bold",color="black",size=12),
axis.text=element_text(color="black",size=10),
legend.background=element_blank(),
legend.title=element_blank(),
legend.text=element_text(face="bold",color="black",size=10),
legend.spacing.x=unit(0,"cm"),
legend.key = element_blank(),
legend.key.size =unit(0.6,"cm"),
legend.position=c(0.99,0.99),
legend.justification = c(0.95,0.84))+
guides(fill="none",color = guide_legend(override.aes = list(size=2)))#去除填充图例
p
#根据实际效果调整图例位置避免重叠可以调整legend.position=c(0.99,0.99),legend.justification = c(0.95,0.84)
#legend.position表示图例在图中位置,参数分别代表其在坐标轴占比,前者x轴,后者y轴,c(1,1)表示图中右上角
#egend.justification表示图例与所给位置对齐方式,前置代表x轴,后者代表y轴,c(0.5,0.5)表示居中对齐,即图例中心坐标为指定位置,0分别代表左对齐和下对齐,1分别代表右对齐和上对齐
#保存结果
file = paste(diffpath.2,"/",group1,"_","Edger_Volcano.pdf",sep = "")
ggsave(file,p,width = 12/2.54,height = 10.5/2.54)
file = paste(diffpath.2,"/",group1,"_","Edger_Volcano.png",sep = "")
ggsave(file,p,width = 12/2.54,height = 10.5/2.54)
#出图效果如下图所示,该图片为随机数据出图,无实际意义
作者:思农生信团队成员:汪志祥