本节内容属于思农数据工程产物,欢迎大家批评指正。
library(dplyr)
library(phyloseq)
library(ggClusterNet)
library(ggplot2)
library(pacman)
p_load(ropls)#有则library,无则install并library
library(ggsci)
library(reshape2)
rm(list = ls())
#实际使用优先加载工作目录 setwd(“ ”)
#确定文件和图片输出路径
outpath <- "C:/result"
oplspath = paste(outpath,"/OPLS-DA分析/",sep ="")
dir.create(oplspath)
#导入包自带数据
data(sacurine)
#提取代谢物组成和样品信息
dat<- sacurine$dataMatrix
group <- sacurine$sampleMetadata
#定义oplsda分析组别
group1 <- "Gender"
#oplsda分析,orthoI = NA表示执行oplsda,特征向量数量由模型效果确认,默认为0表示plsda分析;
#建立模型过程中,当报错显示第一预测主成分不显著时,需手动指定正交特征向量数量,最大为9;
oplsda = opls(dat,group$gender, predI = 1,orthoI = NA,permI = 200,crossvalI = 12)#permI常用200,crossvalI代表k着验证法中的k,根据样本数量确定
模型运行结果如上图所示。R2X(cum) 代表模型对X变量的累计解释度,其值为0.275;R2Y(cum)模型对Y变量的累计解释度,其值为0.73;Q2(cum) 代表模型的可预测性,其值为0.617;RMSEE 代表 根均方误差估计(Root Mean Square Error of Estimate),其值为0.262,它是衡量模型预测值与实际值之间差异的指标,RMSEE 的值越小,表示模型的预测效果越好;pre表示模型预测主成分个数,1;ort代表模型正交成分个数,2;pR2Y,pQ2分别代表其显著性检验结果,0.005。
#提取模型解释度
oplsda.eig <- data.frame(oplsda@modelDF)
#导出数据
file <- paste(oplspath,group1,"_model_statistic.csv",sep="")
write.csv(oplsda.eig,file,quote =T)
#提取检验数据
permutation_data <- data.frame(oplsda@suppLs[["permMN"]])
#检验数据会有201行,第一行为模型结果,剩下的200次为随机检验结果
#添加行名,使其看起来更加直观一些
permutation_name <- paste("permutation",1:200,sep = "")
rownames(permutation_data) <- c("model",permutation_name)
#导出数据
file <- paste(oplspath,group1,"_model_permutation.csv",sep="")
write.csv(permutation_data,file,quote =T)
#列名含义同模型含义解释,sim代表随机置换的Y变量与原模型Y变量的相似度,即置换保留度,1代表与原模型保持一致
#提取模型中样本得分
sample.score = oplsda@scoreMN %>% #得分矩阵
as.data.frame() %>%
mutate(Group = group$gender,
o1=oplsda@orthoScoreMN[,"o1"],
o2=oplsda@orthoScoreMN[,"o2"]) #正交矩阵
#oplsda@scoreMN代表样本预测主成分得分;oplsda@orthoScoreMN代表样本正交成分得分
head(sample.score)#查看
file <- paste(oplspath,group1,"_sample_score.csv",sep="")
write.csv(sample.score,file,quote =T)
ovip <- data.frame( VIP_o1 = getVipVn(oplsda,orthoL = TRUE),
VIP_p1 = getVipVn(oplsda,orthoL = FALSE))
file <- paste(oplspath,group1,"_VIP_score.csv",sep="")
write.csv(ovip,file,quote =T)
#根据vip筛选差异微生物
ovip_select <- ovip %>% filter(VIP_p1 >= 1) %>% # 以VIP>=1为阈值进行筛选。
arrange(VIP_p1) %>%
#根据VIP值升序排序 ,使可视化时y轴从上到下递减 ,ggplot2中非连续变量默认y轴呈现顺序为从下到上,即第一行的样品在y轴最下方,最后一行的样品在y轴最上方
mutate(feature = factor(rownames(.),levels = rownames(.)))
ovip_select %>% head()
dim(ovip_select)#vip大于1的物质个数
#top30,选择可视化展示数量,20即为nrow(ovip_select)-19,数量较少时可选择全部展示
vip_plot <- ovip_select[(nrow(ovip_select)-29):nrow(ovip_select),]
#导出数据
file <- paste(oplspath,group1,"_VIP_difference_top30.csv",sep="")
write.csv(vip_plot,file,quote =T)
#将检验数据转换为长数据框,便于可视化,借助于reshape2包中melt函数
permutation_plot <- melt(permutation_data[,c("R2Y.cum.","Q2.cum.","sim")],id="sim")
#可视化
#得分图可视化,逐行解释
p1 <- ggplot(sample.score, aes(p1, o1, color = Group)) +#指定绘图对象,x轴,y轴和颜色分类
geom_hline(yintercept = 0, linetype = 'dashed', size = 0.5) + #添加横向等值线,y=0,线型为虚线,线的尺寸为0.5即粗细,值越大线越粗
geom_vline(xintercept = 0, linetype = 'dashed', size = 0.5) + #添加纵向等值线,x=0,线型为虚线,线的尺寸为0.5即粗细,值越大线越粗
geom_point(alpha = 0.6, size = 2)+#添加散点,透明度为0.6,大小为2,默认为实心圆,修改形状可通过shape参数
xlab(paste("T score[1] (",round(oplsda.eig["p1",1]*100,2), "%)",sep=""))+ #指定x轴标题内容——预测主成分加其组间解释度百分比
ylab(paste("Orthogonal T score[1] (",round(oplsda.eig["o1",1]*100,2), "%)",sep=""))+##指定y轴标题内容——正交成分轴1加其组内解释度百分比
ggtitle(paste(group1," OPLS-DA Scores Plot",sep = ""))+##指定标题,空格是为其刚好与图形尺寸匹配
stat_ellipse(level = 0.95, linetype = 'solid',
size = 1,alpha=0.7) + #添加置信椭圆,置信水平0.95,实线,尺寸为1,透明度为0.7,默认不对椭圆进行填充,可通过fill参数设置
scale_color_manual(values=c("F"="#E41A1C","M"="#377EB8"))+
#这里指定类别颜色,需根据实际情况将标签调整为自己的组别标签,例如,组别为A,B,则调整为values=c("A"="#E41A1C","B"="#377EB8")
theme_bw() +
theme(panel.border=element_rect(colour= "black",fill=NA,size=0.75),#图框外边线颜色为黑色,粗细为0.75,不填充
panel.grid.minor=element_blank(),#隐藏图中的次要网格线
panel.background=element_blank(),#绘图背景为空白
plot.background=element_blank(),#全部背景为空白
plot.title =element_text(face="bold",color="black",size=12,vjust=0.7,hjust = 0.5),
#标题文字设置,face调整字体格式,”bold”代表加粗,颜色为黑,大小为12,垂直对齐为0.7,值越大距离图框越远,水平方向上居中对齐 ,值越大越靠右
axis.title.x = element_text(size = 11,colour = "black",face = "bold",vjust = 1),#x轴标题设置
axis.title.y = element_text(size = 11,colour = "black",face = "bold",hjust = 0.5,vjust = 1.6),#y轴标题设置
axis.text=element_text(color="black",size=10,vjust = 0.5,hjust = 0.5),#坐标轴标签设置
legend.background=element_blank(),#图例区域背景设置
legend.title=element_blank(),#图例标题设置
legend.text=element_text(face="bold",color="black",size=10),#图例标签设置
legend.spacing.x=unit(0.2,"cm"),#图例水平间距设置
legend.key = element_blank(),#图例中形状背景设置
legend.key.size =unit(0.45,"cm"),#图例中形状大小
legend.position=c(0.65,0.99),#图例在图中位置,数值分别代表占据x轴和轴比例
legend.direction = "horizontal",#图例方向
legend.justification = c(0.95,0.84))#图例对齐方式,即图例位置与设定位置之间的关系
#主题设置,theme_bw为黑白主题,即画面背景为空白,theme()是对绘图细节进行调整的重要函数,其中element_blank()代表空白,element_text()为字体设置函数
p1
FileName <- paste(oplspath,group1,"_score.plot", ".pdf", sep = "")
ggsave(FileName, p1,width = 5.5, height =4)
FileName <- paste(oplspath,group1,"_score.plot", ".png", sep = "")
ggsave(FileName, p1,width = 5.5, height =4)
#前30vip棒棒糖
p2 <- ggplot(vip_plot,aes(x= VIP_p1,y=feature))+
geom_segment(aes(x=1,xend=VIP_p1,y=feature,yend=feature),linewidth=0.6)+
geom_point(color="#377EB8",shape = 19,size=2.8)+ theme_bw()+
xlab("VIP scores")+labs(title="Top30 different features scores")+
scale_x_continuous(limits=c(1,max(vip_plot$VIP_p1)+0.1),expand = c(0,0))+
theme(panel.border=element_rect(colour= "black",fill=NA,size=0.75),
panel.grid.minor=element_blank(),
panel.background=element_blank(),
plot.background=element_blank(),
plot.title =element_text(face="bold",color="black",size=12,vjust=0.7,hjust = 0.5),
axis.title.x = element_text(size = 11,colour = "black",face = "bold",vjust = 1),
axis.title.y = element_text(size = 11,colour = "black",face = "bold",hjust = 0.5,vjust = 1.6),
axis.text.y = element_text(color="black",size=7.5,vjust = 0.5,hjust = 1),
)
p2
FileName <- paste(oplspath,group1,"_TOP30_VIP.plot", ".pdf", sep = "")
ggsave(FileName, p2,width = 9, height =6)
FileName <- paste(oplspath,group1,"_TOP30_VIP.plot", ".png", sep = "")
ggsave(FileName, p2,width = 9, height =6)
#检验结果可视化
#拟合直线颜色
mycol <- rgb(95/255,95/255,95/255)
#R2,Q2截距,斜率,图例标签
IR2 <- summary(lm(value~sim,data = permutation_plot[which(permutation_plot$variable=="R2Y.cum."),]))$coefficients["(Intercept)","Estimate"]
IQ2 <- summary(lm(value~sim,data = permutation_plot[which(permutation_plot$variable=="Q2.cum."),]))$coefficients["(Intercept)","Estimate"]
labelR2 <- expression(paste("R"^2, "Y(cum)",sep = ""))#expression,可表达数学公式
labelQ2 <- expression(paste("Q"^2,"(cum)",sep = ""))##expression,可表达数学公式
#出图
p3 <- ggplot(permutation_plot,aes(x= sim,y=value,color=variable,shape = variable))+
theme_bw()+geom_point(alpha=0.8,size=1.5)+
scale_color_manual(values=c("Q2.cum."="#377EB8","R2Y.cum."="#E41A1C"),
labels=c(labelR2,labelQ2))+
scale_shape_manual(values=c(19,17),labels=c(labelR2,labelQ2))+
geom_segment(x=0,y=IR2,xend=1,yend=permutation_data["model","R2Y.cum."],linetype="dashed",color=mycol,linewidth=0.4)+
geom_segment(x=0,y=IQ2,xend=1,yend=permutation_data["model","Q2.cum."],linetype="dashed",color=mycol,linewidth=0.4)+
scale_y_continuous(limits = c(min(permutation_plot$value)-0.1,max(permutation_plot$value)+0.1),expand = c(0,0),
breaks = seq(round(min(permutation_plot$value),1),round(max(permutation_plot$value),1),0.4))+
ggtitle(paste(group1," OPLS-DA permutation",sep = ""))+
labs(x="Correlation Coefficient",y = expression(paste("R"^2, "Y(cum) and ","Q"^2,"(cum)",sep = "")),
subtitle = bquote(Intercepts ~ ":" ~ R^2 ~ Y(cum) ~ "=" ~ .(round(IR2, 2))~"," ~ Q^2 ~ "(cum)"~"=" ~ .(round(IQ2, 2))))+
theme(panel.border=element_rect(colour= "black",fill=NA,size=0.75),
panel.grid.minor=element_blank(),
panel.background=element_blank(),
plot.background=element_blank(),
plot.title =element_text(face="bold",color="black",size=12,vjust=0.7,hjust = 0.5),
plot.subtitle = element_text(face="bold",color="black",size=11,vjust=0.7,hjust = 0.5),
axis.title.x = element_text(size = 11,colour = "black",face = "bold",vjust = 1),
axis.title.y = element_text(size = 11,colour = "black",face = "bold",hjust = 0.5,vjust = 1),
axis.text = element_text(color="black",size=10,vjust = 0.5,hjust = 0.5),
legend.background=element_blank(),
legend.title=element_blank(),
legend.text=element_text(color="black",size=10,face="bold",vjust = 0.5,hjust = 0.5),
legend.spacing.x=unit(0,"cm"),
legend.key = element_blank(),
legend.key.size =unit(0.6,"cm"),
legend.position=c(0.99,0.01),
legend.direction = "vertical",
legend.justification = c(0.95,0))+
geom_hline(yintercept=0,linetype=1,color = 'black',size = 0.5) +
geom_vline(xintercept=0,linetype=1,color = 'black',size = 0.5)
p3
FileName <- paste(oplspath,group1,"_permutation.plot", ".pdf", sep = "")
ggsave(FileName, p3,width = 5.5, height =4.6)
FileName <- paste(oplspath,group1,"_permutation.plot", ".png", sep = "")
ggsave(FileName, p3,width = 5.5, height =4.6)
作者:思农生信团队-汪志祥