简介
加权分位数和(Weighted quantile sum,WQS)回归模型是分析混合物暴露与结局关联的常用模型。WQS回归模型考虑到每种化学物质与结局指标的关联可能具有不同的方向和程度,对混合暴露与结局之间的关系分别进行正向和负向的评估。该模型可将数据随机拆分成训练数据集和验证数据集,以计算WQS指数与结局的关联以及所有化合物的权重。同时,它引入bootstrap方法以增强WQS回归模型对关键因子预测的稳定性。
示例
本示例以WQS的正向关联为例。示例数据如下:涉及的变量包括10种人口学基本特征和临床特征(作为混杂因素,在data_p数据集第4~13列),38种环境化学物(作为自变量,在data_p数据集第14~51列)和获卵数(作为因变量,在data_p数据集第52列)。
2.1 安装并加载相应的包
install.packages("gWQS")
library(gWQS)
library(ggplot2)
2.2 将多分类变量转换成哑变量
###data_p为原始数据
###将多分类变量转换成哑变量(由于在同一研究中贝叶斯核机回归模型将多分类变量转换成哑变量,为保持一致,WQS模型中的多分类变量也转换成哑变量)
a<- data_p[,4:13] ### 10个混杂因素,其中,Education、Income和Infertility_diagnosis为多分类变量
a$Education<-as.factor(a$Education)
a$Income<-as.factor(a$Income)
a$Infertility_diagnosis<-as.factor(a$Infertility_diagnosis)
Education_dum<-model.matrix(~Education,data=a)
Income_dum<-model.matrix(~Income,data=a)
Infertility_dum<-model.matrix(~Infertility_diagnosis,data=a)
head(cbind(Education_dum,a$Education)) ###检查多分类变量变成哑变量
###data_use将多分类混杂变量替换为哑变量
data_use<-data.frame(Age=a$Age,BMI=a$BMI,Parity=a$Parity,Smoking=a$Smoking,Passive_smoking=a$Passive_smoking,Alcohol=a$Alcohol,
Education_dum[,2:3],Income_dum[,2:3],Infertility_dum[,2:4],Insemination=a$Insemination,
data_p[14:52],check.names = F)
exposure_name<-names(data_use[,15:52]) ###暴露因素
covars_name<-names(data_use[,1:14]) ###混杂因素
2.3 构建WQS模型
###正向关联
###本研究中,设置q=4表示将暴露划分为四分位数;validation=0.6表示随机抽取数据集中的 60% 作为验证集,剩下的40% 作为训练集;
b=10000表示随机抽样10000次;b1_pos = TRUE表示估计β1为正的权重;b1_constr =FALSE表示使用优化算法对权重进行估计时不进行限制;
family="poisson"表示采用泊松分布进行拟合;seed=1000为随机种子数。
gwqs11<-gwqs(Retrieved_oocytes~wqs+Age+BMI+Parity+Smoking+Passive_smoking+Alcohol+Education2+Education3+Income2+Income3+
Infertility_diagnosis2+Infertility_diagnosis3+Infertility_diagnosis4+Insemination, mix_name =exposure_name,
data=data_use, q=4, validation = 0.6, b=10000, b1_pos = TRUE, b1_constr =FALSE, family="poisson",
seed=1000, plots=TRUE, tables=TRUE)
###计算权重
w_ord <- order(gwqs11$final_weights$mean_weight)
mean_weight <- gwqs11$final_weights$mean_weight[w_ord]
mix_name <- factor(gwqs11$final_weights$mix_name[w_ord],
levels = gwqs11$final_weights$mix_name[w_ord])
data_plot11 <- data.frame(mean_weight, mix_name)
###权重图
wqs11<-ggplot(data_plot11, aes(x = mix_name, y = mean_weight, fill = mix_name)) +
geom_bar(stat = "identity", color = "black") + theme_bw()+ggtitle("A") +
theme(axis.ticks = element_blank(),
axis.title = element_blank(),
axis.text.x = element_text(color='black'),
legend.position = "none") + coord_flip()+
geom_hline(aes(yintercept=0.0263),colour="#990000",linetype="dashed") ###0.0263为化学物个数的倒数,用于划分化学物是否对结局重要
###获卵数和WQS指数的散点图加上拟合曲线
wqs11a<-ggplot(gwqs11$y_wqs_df, aes(wqs, y_adj)) + geom_point() +
stat_smooth(method = "loess", se = FALSE, size = 1.5) + theme_bw()+ggtitle("A")
summary(gwqs11) ###WQS指数
2.4 导出权重
gwqs_weights_tab(gwqs11)
gwqs11$final_weights
a11<-match(exposure_name,gwqs11$final_weights$mix_name)
b11<-gwqs11$final_weights[a11,]
write.csv(b11,"WQS1正向关联.csv") ###导出正向关联权重的文件,如下图
###mix_name为化合物名称,mean_weight为该化合物的权重
2.5 美化权重图
###加载相应的包
library(ggplot2)
library(ggpubr)
library(eoffice)
WQS_weight$Group<-factor(WQS_weight$Group,level=c("di-OPEs","mono-PAEs","Parabens","CPs","Bisphenols","BPs","SPAs"))
WQS_weight$label<-factor(WQS_weight$label,level=c("DBP", "BCIPP", "DPHP", "DCP", "BBOEP", "DEP","MMP", "MEP", "MIPrP", "MBP", "MCHP", "MBzP", "MHINP", "MEHP", "MCMHP",
"MEOHP", "MEHHP","MECPP", "MCPP","MeP", "EtP", "PrP","TCS", "TCC","BPA", "BPB", "BPS", "BPAP",
"BP-1", "BP-2", "BP-3", "4-OH-BP","BHA", "BHT", "BHT-OH", "BHT-COOH", "BHT-CHO", "BHT-Q"))
plot.f1p <- ggplot(data = WQS_weight,aes(x=reorder(label,p1)))+
geom_bar(aes(y=p1,fill=Group),stat="identity")+
labs(x=NULL,y="Estimated weights for WQS index")+
facet_grid(.~Group ,scales = "free",space = "free")+
theme_minimal()+ ##删背景网格
theme_bw() ##外框
###继续调整权重图
plot_f1_positive<-plot.f1p+theme(strip.text = element_text(size=12,family="serif",color="black"))+
theme(axis.title.y=element_text(size=12, color="black",family="serif",face = "bold"))+
theme(axis.text.y = element_text(size=10,family="serif",color="black"))+
theme(axis.text.x = element_text(size=10,family="serif",color="black",angle = 90))+theme(text=element_text(family="serif"))+
geom_hline(aes(yintercept=1/38),linetype="dashed")
plot_f1_positive
ggsave(plot_f1_positive, filename = "plot_f1_positive.jpg", width = 15, height = 5)