写在前面
今天给大家分享内容是,利用 ggClusterNet 新更新的内容:样本对比相关互连, 在火山图上下调的基础上,增加了两组样本的相关性互连展示,更新了火山图的展示方式,让数据展示更加直观清晰。
在分析之前,如果你没使用过r语言,可以如此安装R包
install.packages("BiocManager")
library(BiocManager)
install("phyloseq")
install("tidyverse")
### 更新R包ggClusterNet
remotes::install_github("taowenmicro/ggClusterNet")
library(tidyverse)
library(ggClusterNet)
library(phyloseq)
路径准备
res1path = "./upper/"
diffpath.2 = paste(res1path,"/micro.compounds.9.axis/",sep = "")
fs::dir_create(diffpath.2)
## ps 是 ggClusterNet包自带内置数据
准备参数
group = "Group"
pvalue = 0.05
lfc =0
artGroup = NULL
method = "TMM"
j = "OTU" ### 选择数据运行的分类
path = diffpath.2
正式分析-先用edger方法计算差异,基础火山图展示
##判断分类水平
if (j %in% c("OTU","gene","meta")) {
ps = ps
} else if (j %in% c(1:7)) {
ps = ps %>%
ggClusterNet::tax_glom_wt(ranks = j)
} else if (j %in% c("Kingdom","Phylum","Class","Order","Family","Genus","Species")){
ps = ps %>%
ggClusterNet::tax_glom_wt(ranks = j)
} else {
ps = ps
print("unknown j, checked please")
}
sub_design <- as.data.frame(phyloseq::sample_data(ps))
colnames(sub_design) = "Group"
Desep_group <- as.character(levels(as.factor(sub_design$Group)))
Desep_group
if ( is.null(artGroup)) {
#--构造两两组合#-----
aaa = combn(Desep_group,2)
# sub_design <- as.data.frame(sample_data(ps))
}
if (!is.null(artGroup)) {
aaa = as.matrix(b )
}
otu_table = as.data.frame(ggClusterNet::vegan_otu(ps))
count = as.matrix(otu_table)
count <- t(count)
sub_design <- as.data.frame(phyloseq::sample_data(ps))
dim(sub_design)
sub_design$SampleType = as.character(sub_design$Group)
sub_design$SampleType <- as.factor(sub_design$Group)
# create DGE list
d = edgeR::DGEList(counts=count, group=sub_design$SampleType)
d$samples
d = edgeR::calcNormFactors(d,method=method)#默认为TMM标准化
# Building experiment matrix
design.mat = model.matrix(~ 0 + d$samples$group)
colnames(design.mat)=levels(sub_design$SampleType)
d2 = edgeR::estimateGLMCommonDisp(d, design.mat)
d2 = edgeR::estimateGLMTagwiseDisp(d2, design.mat)
fit = edgeR::glmFit(d2, design.mat)
#------------根据分组提取需要的差异结果#------------
for (i in 1:dim(aaa)[2]) {
# i = 1
Desep_group = aaa[,i]
print( Desep_group)
# head(design)
# 设置比较组写在前面的分组为enrich表明第一个分组含量高
group = paste(Desep_group[1],Desep_group[2],sep = "-")
group
BvsA <- limma::makeContrasts(contrasts = group,levels=design.mat)#注意是以GF1为对照做的比较
# 组间比较,统计Fold change, Pvalue
lrt = edgeR::glmLRT(fit,contrast=BvsA)
# FDR检验,控制假阳性率小于5%
de_lrt = edgeR::decideTestsDGE(lrt, adjust.method="fdr", p.value=pvalue,lfc=lfc)#lfc=0这个是默认值
summary(de_lrt)
# 导出计算结果
x=lrt$table
x$sig=de_lrt
head(x)
#------差异结果符合otu表格的顺序
row.names(count)[1:6]
x <- cbind(x, padj = p.adjust(x$PValue, method = "fdr"))
enriched = row.names(subset(x,sig==1))
depleted = row.names(subset(x,sig==-1))
x$level = as.factor(ifelse(as.vector(x$sig) ==1, "enriched",ifelse(as.vector(x$sig)==-1, "depleted","nosig")))
x = data.frame(row.names = row.names(x),logFC = x$logFC,level = x$level,p = x$PValue)
head(x)
#------差异结果符合otu表格的顺序
x1 = x %>%
dplyr::filter(level %in% c("enriched","depleted","nosig") )
head(x1)
x1$Genus = row.names(x1)
# x$level = factor(x$level,levels = c("enriched","depleted","nosig"))
if (nrow(x1)<= 1) {
}
x2 <- x1 %>%
dplyr::mutate(ord = logFC^2) %>%
dplyr::filter(level != "nosig") %>%
dplyr::arrange(desc(ord)) %>%
head(n = 5)
x3 <- x1 %>%
dplyr::mutate(ord = logFC^2) %>%
dplyr::filter(level != "nosig") %>%
dplyr::arrange(desc(ord))
file = paste(path,"/",group,j,"_","Edger_Volcano_difference.csv",sep = "")
write.csv(x3,file,quote = F)
head(x2)
p <- ggplot(x1,aes(x =logFC ,y = -log2(p), colour=level)) +
geom_point() +
geom_hline(yintercept=-log10(0.2),
linetype=4,
color = 'black',
size = 0.5) +
geom_vline(xintercept=c(-1,1),
linetype=3,
color = 'black',
size = 0.5) +
ggrepel::geom_text_repel(data=x2, aes(x =logFC ,y = -log2(p), label=Genus), size=1) +
scale_color_manual(values = c('blue2','red2', 'gray30')) +
ggtitle(group) + theme_bw()
p
file = paste(path,"/",group,j,"_","Edger_Volcano.pdf",sep = "")
ggsave(file,p,width = 8,height = 6)
file = paste(path,"/",group,j,"_","Edger_Volcano.png",sep = "")
ggsave(file,p,width = 8,height = 6)
colnames(x) = paste(group,colnames(x),sep = "")
if (i ==1) {
table =x
}
if (i != 1) {
table = cbind(table,x)
}
}
x = table
###########添加物种丰度#----------
# dim(count)
# str(count)
count = as.matrix(count)
norm = t(t(count)/colSums(count)) #* 100 # normalization to total 100
dim(norm)
norm1 = norm %>%
t() %>% as.data.frame()
# head(norm1)
#数据分组计算平均值
library("tidyverse")
head(norm1)
iris.split <- split(norm1,as.factor(sub_design$SampleType))
iris.apply <- lapply(iris.split,function(x)colMeans(x))
# 组合结果
iris.combine <- do.call(rbind,iris.apply)
norm2= t(iris.combine)
#head(norm)
str(norm2)
norm2 = as.data.frame(norm2)
# dim(x)
head(norm2)
x = cbind(x,norm2)
head(x)
#在加入这个文件taxonomy时,去除后面两列不相干的列
# 读取taxonomy,并添加各列名称
if (!is.null(ps@tax_table)) {
taxonomy = as.data.frame(ggClusterNet::vegan_tax(ps))
head(taxonomy)
# taxonomy <- as.data.frame(tax_table(ps1))
#发现这个注释文件并不适用于直接作图。
#采用excel将其分列处理,并且删去最后一列,才可以运行
if (length(colnames(taxonomy)) == 6) {
colnames(taxonomy) = c("kingdom","phylum","class","order","family","genus")
}else if (length(colnames(taxonomy)) == 7) {
colnames(taxonomy) = c("kingdom","phylum","class","order","family","genus","species")
}else if (length(colnames(taxonomy)) == 8) {
colnames(taxonomy) = c("kingdom","phylum","class","order","family","genus","species","rep")
}
# colnames(taxonomy) = c("kingdom","phylum","class","order","family","genus")
# Taxonomy排序,并筛选OTU表中存在的
library(dplyr)
taxonomy$id=rownames(taxonomy)
# head(taxonomy)
tax = taxonomy[row.names(x),]
x = x[rownames(tax), ] # reorder according to tax
if (length(colnames(taxonomy)) == 7) {
x = x[rownames(tax), ] # reorder according to tax
x$phylum = gsub("","",tax$phylum,perl=TRUE)
x$class = gsub("","",tax$class,perl=TRUE)
x$order = gsub("","",tax$order,perl=TRUE)
x$family = gsub("","",tax$family,perl=TRUE)
x$genus = gsub("","",tax$genus,perl=TRUE)
# x$species = gsub("","",tax$species,perl=TRUE)
}else if (length(colnames(taxonomy)) == 8) {
x$phylum = gsub("","",tax$phylum,perl=TRUE)
x$class = gsub("","",tax$class,perl=TRUE)
x$order = gsub("","",tax$order,perl=TRUE)
x$family = gsub("","",tax$family,perl=TRUE)
x$genus = gsub("","",tax$genus,perl=TRUE)
x$species = gsub("","",tax$species,perl=TRUE)
}else if (length(colnames(taxonomy)) == 9) {
x = x[rownames(tax), ] # reorder according to tax
x$phylum = gsub("","",tax$phylum,perl=TRUE)
x$class = gsub("","",tax$class,perl=TRUE)
x$order = gsub("","",tax$order,perl=TRUE)
x$family = gsub("","",tax$family,perl=TRUE)
x$genus = gsub("","",tax$genus,perl=TRUE)
x$species = gsub("","",tax$species,perl=TRUE)
}
} else {
x = cbind(x,tax)
}
基于火山图基础上进行相关性连接展示
res = x
head(res)
# 计计算OTU之间相关
n1 = cor_Big_micro(ps%>% filter_OTU_ps(500),
r.threshold = 0.85
)
cor = n1[[1]]
#-----计算边#--------
map = sampleData(ps)
ids = map$Group %>%
unique() %>%
combn(2)
# 两两组合的组做差异,展示火山图也是展示两两组合
for (i in 1:dim(aaa)[2]) {
# i = 1
Desep_group = aaa[,i]
print( Desep_group)
# head(design)
# 设置比较组写在前面的分组为enrich表明第一个分组含量高
group = paste(Desep_group[1],Desep_group[2],sep = "-")
group
node = data.frame(elements =row.names(res),X1 = res[,paste0(ids[1,i],"-",ids[2,i],"logFC")],
X2 =res[,paste0(ids[1,i],"-",ids[2,i],"p")])
node$X2 = -log2(node$X2)
node2 = data.frame(elements =row.names(res),
X1 = res[,paste0(ids[1,i],"-",ids[2,i],"logFC")],
X2 =res[,paste0(ids[1,i],"-",ids[2,i],"p")],
class = res[,paste0(ids[1,i],"-",ids[2,i],"level")]
)
node2$X2 = -log2(node2$X2)
edge = edgeBuild(cor = cor,node = node)
head(edge)
library(ggnewscale)
col1 = c("red","blue")
names(col1) = c("+","-")
cols = c('blue2','red2', 'gray30')
names(cols) = c("depleted","enriched","nosig")
p <- ggplot() +
geom_point(data = node2,aes(x =X1 ,y = X2, colour=class)) +
scale_color_manual(values = cols) +
ggnewscale::new_scale_color() +
geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(cor)),
data = edge, size = 0.5) +
geom_hline(yintercept=2.5,
linetype=4,
color = 'black',
size = 0.5) +
geom_vline(xintercept=c(-1,1),
linetype=3,
color = 'black',
size = 0.5) +
# ggrepel::geom_text_repel( aes(x =logFC ,y = -log2(p), label=Genus), size=1) +
scale_color_manual(values = col1)+ggtitle(group)+
theme_bw()
p
### 保存进阶火山图
file = paste(path,"/",group,j,"_","Edger_Volcano_1.pdf",sep = "")
ggsave(file,p,width = 8,height = 6)
file = paste(path,"/",group,j,"_","Edger_Volcano_1.png",sep = "")
ggsave(file,p,width = 8,height = 6)
}
根际互作生物学研究室 简介
根际互作生物学研究室是沈其荣院士土壤微生物与有机肥团队下的一个关注于根际互作的研究小组。本小组由袁军教授带领,主要关注:1.植物和微生物互作在抗病过程中的作用;2 环境微生物大数据整合研究;3 环境代谢组及其与微生物过程研究体系开发和应用。团队在过去三年中在 Nature Communications,ISME J,Microbiome,SCLS,New Phytologist,iMeta,Fundamental Research, PCE,SBB,JAFC(封面),Horticulture Research,SEL(封面),BMC plant biology等期刊上发表了多篇文章。欢迎关注 微生信生物 公众号对本研究小组进行了解。
撰写:文涛 牛国庆
修改:文涛
排版:杨雯儀
审核:袁军 团队工作及其成果 (点击查看)
了解 交流 合作
小组负责人邮箱 袁军:junyuan@njau.edu.cn;
小组成员文涛:taowen@njau.edu.cn等
团队公众号:微生信生物 添加主编微信,或者后台留言。
1.仅限相关专业或研究方向人员添加,必须实名,不实名则默认忽略。
2.非相关专业的其他人员及推广宣传人员禁止添加。
3.添加主编微信需和简单聊一聊专业相关问题,等待主编判断后,可拉群。
4 微生信生物VIP微信群不受限制,给微生信生物供稿一次即可加入(群里发送推文代码+高效协助解决推文运行等问题+日常问题咨询回复)。
加主编微信 加入群聊
目前营销人员过多,为了维护微生信生物3年来维护的超5500人群聊,目前更新进群要求: