手写非靶代谢组t检验过程并优化可视化图形布局(数据工程)

学术   2024-09-29 11:29   江苏  

本节内容属于思农数据工程产物,欢迎大家批评指正。

差异代谢物完整流程分析-t-test,整理出常用所有阈值结果,另外对于可视化结果进行进一步美化和调整。

rm(list = ls())#清空环境#加载工作目录setwd("C:/代谢组/data")#确定结果输出路径outpath <- "C:/代谢组/result"diffpath.2 = paste(outpath,"/2_差异代谢物分析/",sep = "")dir.create(diffpath.2)#载入相关包library(dplyr)library(phyloseq)library(ggplot2)#加载ps文件,包含代谢物相对丰度表,样本信息,代谢物信息ps <- readRDS("analysis1_level24_ra_metabolism.rds")ps= ps %>%filter_taxa(function(x) sum(x ) > 0 , TRUE)#过滤在任一样本中都不存在的微生物data <- t(data.frame((otu_table(ps))))#提取代谢物相对丰度数据group <- sample_data(ps)#提取组别信息data <- data[group$ID,]#保证代谢物数据和丰度数据样本顺序一致data <- cbind(group[,1:2],data)#添加ID和组别列data$Group <- factor(data$Group)#将组别列转换为因子,levels参数可指定因子顺序,labels参数可指定分组标签data.raw <- data#原始数据保存环境中#提取各组平均丰度tax <- data.frame(tax_table(ps))tax <- tax[,c("Mean1","Mean2","Mean3")]tax1 <-data.frame(matrix(as.numeric((unlist(tax))),ncol=ncol(tax),nrow=nrow(tax)))#将数据调整为数值变量,便于计算rownames(tax1) <- rownames(tax)#行名colnames(tax1) <- colnames(tax)#列名
#---Group1-Group0----#筛选对应分组变量group1 <- "Group1-Group0"log2FC <-log2((tax1$Mean1) /(tax1$Mean2))data <- data.raw[which(!data.raw$Group=="Group2"),]#差异检验result=data.frame(ID=colnames(data)[3:ncol(data)], p=sapply(data[,3:ncol(data)], function(x){t.test(x~data$Group)[["p.value"]]}), logFC=log2FC)#P值进行BH校正 result <- cbind(result,p_adj=p.adjust(result$p, method = "BH"))#到这与上次更新保持一致
#新增内容,多阈值结果统计#根据不同阈值筛选差异微生物result$level_padj_fc1 = as.factor(ifelse(as.vector(result$logFC)>0&as.vector(result$p_adj)<0.05, "enriched", ifelse(as.vector(result$logFC)<0&as.vector(result$p_adj)<0.05, "depleted","nosig")))result$level_fc2 = as.factor(ifelse(as.vector(result$logFC)>1&as.vector(result$p)<0.05, "enriched", ifelse(as.vector(result$logFC)<(-1)&as.vector(result$p)<0.05, "depleted","nosig")))result$level_fc1.5=as.factor(ifelse(as.vector(result$logFC)>log2(1.5)&as.vector(result$p)<0.05, "enriched", ifelse(as.vector(result$logFC)<(-log2(1.5))&as.vector(result$p)<0.05, "depleted","nosig")))result$level_fc1.2=as.factor(ifelse(as.vector(result$logFC)>log2(1.2)&as.vector(result$p)<0.05, "enriched", ifelse(as.vector(result$logFC)<(-log2(1.2))&as.vector(result$p)<0.05, "depleted","nosig")))result$level_fc1=as.factor(ifelse(as.vector(result$logFC)>0&as.vector(result$p)<0.05, "enriched", ifelse(as.vector(result$logFC)<0&as.vector(result$p)<0.05, "depleted","nosig")))result1<-resultresult1$Genus = row.names(result1)#定义一列方便画图和流程化result1 <- result1[order(result$level_padj_fc1,result1$level_fc2,result1$level_fc1.5,result1$level_fc1.2,result1$level_fc1),]#按顺序排列#导出检验结果result1 <- cbind(tax1[rownames(result1),c("Mean1","Mean2")],result1)file = paste(diffpath.2,"/",group1,"t.test.csv",sep = "")write.csv(result1,file,quote = TRUE)#整理作图结果,以p<0.05,差异倍数选定为1作为阈值进行示例展示,具体阈值确定根据实际情况确定result2 <- result1 %>% dplyr::mutate(ord = logFC^2) %>% dplyr::filter(level_fc1!= "nosig") %>% dplyr::arrange(desc(ord)) %>% head(n = 5)#添加变化率前五标签file = paste(diffpath.2,"/",group1,"_plotlabel.csv",sep = "")write.csv(result2,file,quote = T)
result3 <- result1 %>% dplyr::mutate(ord = logFC^2) %>% dplyr::filter(level_fc1!= "nosig") %>% dplyr::arrange(desc(ord))#总结所有差异物种 file = paste(diffpath.2,"/",group1,"_all_difference_metabolism.csv",sep = "")write.csv(result3,file,quote = TRUE)
#可视化,指定图例标签,包含level和其具体数量ld <- paste("depleted",nrow(result1[result1$level_fc1=="depleted",]),sep = " ")lr <- paste("enriched",nrow(result1[result1$level_fc1=="enriched",]),sep = " ")ln <- paste("nosig",nrow(result1[result1$level_fc1=="nosig",]),sep = " ")
p <- ggplot(result1,aes(x =logFC ,y = -log10(p), colour=level_fc1)) + geom_point(alpha=0.83,size=1.1) + geom_point(data=result2,aes(x =logFC ,y = -log10(p), fill=level_fc1),shape=21,size=2,colour="black")+ scale_fill_manual(values = c("enriched"="#ffad73","depleted"="#26b3ff","nosig"="grey"))+ geom_hline(yintercept=-log10(0.05),linetype="dashed",color = 'black',size = 0.5) + geom_vline(xintercept=c(-1,1),linetype="dashed",color = 'black',size = 0.5) + ggrepel::geom_text_repel(data=result2, aes(x =logFC ,y = -log10(p), label=Genus), show.legend=FALSE,force=2,size=3) + scale_color_manual(values=c("enriched"="#ffad73","depleted"="#26b3ff","nosig"="grey"), labels=c("enriched"=lr,"depleted"=ld,"nosig"=ln)) + ggtitle(group1) + theme(panel.border=element_rect(colour= "black",fill=NA,size=0.75), panel.grid.minor=element_blank(), panel.background=element_blank(), plot.background=element_blank(), axis.title=element_text(face="bold",color="black",size=11), plot.title =element_text(face="bold",color="black",size=12), axis.text=element_text(color="black",size=10), legend.background=element_blank(), legend.title=element_blank(), legend.text=element_text(face="bold",color="black",size=10), legend.spacing.x=unit(0,"cm"), legend.key = element_blank(), legend.key.size =unit(0.6,"cm"), legend.position=c(0.99,0.99), legend.justification = c(0.95,0.84))+ guides(fill="none",color = guide_legend(override.aes = list(size=2)))#去除填充图例p#根据实际效果调整图例位置避免重叠可以调整legend.position=c(0.99,0.99),legend.justification = c(0.95,0.84)#legend.position表示图例在图中位置,参数分别代表其在坐标轴占比,前者x轴,后者y轴,c(1,1)表示图中右上角#egend.justification表示图例与所给位置对齐方式,前置代表x轴,后者代表y轴,c(0.5,0.5)表示居中对齐,即图例中心坐标为指定位置,0分别代表左对齐和下对齐,1分别代表右对齐和上对齐#保存结果file = paste(diffpath.2,"/",group1,"_","Edger_Volcano.pdf",sep = "")ggsave(file,p,width = 12/2.54,height = 10.5/2.54)
file = paste(diffpath.2,"/",group1,"_","Edger_Volcano.png",sep = "")ggsave(file,p,width = 12/2.54,height = 10.5/2.54)#出图效果如下图所示,该图片为随机数据出图,无实际意义

作者:思农生信团队成员:汪志祥

微生信生物
根际互作生物学研究室是沈其荣院士土壤微生物与有机肥团队下的一个关注于根际互作的研究小组。本小组由袁军教授带领,主要关注:1.植物和微生物互作在抗病过程中的作用 2 环境微生物大数据整合研究3 环境代谢组及其与微生物过程研究体系开发
 最新文章