reactions.tsv 惩罚得分矩阵文件rxn_md.csv 反应注释文件
读取文件
library(data.table)
library(dplyr)
library(tibble)
library(ggplot2)
library(cowplot)
library(ggrepel)
reactions <- fread('./compass/reactions.tsv',fill = TRUE) %>% column_to_rownames('V1')
cell_metadata <- read.csv('./compass/cellmeta.csv', header = TRUE, row.names = 1)
reaction_metadata = fread('./compass/rxn.csv')
定义函数
# Function1:将原始惩罚矩阵输出转换为每个反应的得分,值越大表示活动越活跃
get_reaction_consistencies <- function(compass_reaction_scores, min_range = 1e-3) {
df <- -log(compass_reaction_scores + 1)# 计算负对数
valid_rows <- apply(df, 1, function(x) max(x) - min(x) >= min_range)# 筛选符合条件的行
df <- df[valid_rows, ]
df <- df - min(df, na.rm = TRUE)# 减去最小值
return(df)
}
# Function2:效应和检验
wilcox_test_compass <- function(reaction_consistencies,
cell_metadata,
group,
group_1, group_2){
#提取两组data
c1 = cell_metadata[cell_metadata[,group] == group_1, ]
c2 = cell_metadata[cell_metadata[,group] == group_2, ]
t1 = reaction_consistencies[,rownames(c1)]
t2 = reaction_consistencies[,rownames(c2)]
#定义cohens_d
cohens_d <- function(x, y) {
n_x <- length(x)
n_y <- length(y)
pooled_std <- sqrt(((n_x - 1) * var(x) + (n_y - 1) * var(y)) / (n_x + n_y - 2))
return((mean(x) - mean(y)) / pooled_std)
}
#显著性检验
wilcox_tests <- lapply(rownames(t1), function(x){
wilcox <- wilcox.test(as.numeric(t1[x,]), as.numeric(t2[x,]), alternative="two.sided")
cohen_d <- cohens_d(as.numeric(t1[x,]), as.numeric(t2[x,]))
wilcox = c(row=x,
wilcox_stat=as.numeric(wilcox$statistic),
wilcox_pval=wilcox$p.value,
cohens_d=cohen_d,
adjusted_pval=wilcox$p.value,
metadata_r_id=x)
})
wilcox_results <- do.call('rbind', wilcox_tests) %>%
as.data.frame() %>%
column_to_rownames('row')
wilcox_results$metadata_r_id <- sub("_.*$", "", wilcox_results$metadata_r_id)
wilcox_results$adjusted_pval <- p.adjust(wilcox_results$wilcox_pval, method = 'fdr')
return(wilcox_results)
}
运行结果
reactions[is.na(reactions)] <- 0
reaction_consistencies = get_reaction_consistencies(reactions)
reaction_consistencies = reaction_consistencies[apply(reaction_consistencies, 1, var) > 0,]
# 检验结果 这边取内皮细胞作为演示,年轻和年老的比较
Endo <- cell_metadata[cell_metadata$celltype == "Endo", ]
reaction_consistencies_H <- reaction_consistencies[,rownames(Endo)]
wilcox_results = wilcox_test_compass(reaction_consistencies = reaction_consistencies_H,
cell_metadata = Endo,
group = 'group',
group_1 = "Young",
group_2 = "Older")
# 整合结果 注释
colnames(reaction_metadata)[1] <- "metadata_r_id"
merge_compass_res <- left_join(wilcox_results,
reaction_metadata,
by = "metadata_r_id")
rownames(merge_compass_res) <- rownames(wilcox_results)
#过滤
merge_compass_res_filter <- filter(merge_compass_res, confidence %in% c(0,4))
merge_compass_res_filter$EC_number <- gsub("N/A", NA, merge_compass_res_filter$EC_number)
merge_compass_res_filter <- merge_compass_res_filter[!is.na(merge_compass_res_filter$EC_number),]
可视化
ggplot()+
geom_point(data = merge_compass_res_filter[merge_compass_res_filter$cohens_d>0,], aes(cohens_d, subsystem),
color="red", size=2)+
geom_point(data = merge_compass_res_filter[merge_compass_res_filter$cohens_d<0,], aes(cohens_d, subsystem),
color="blue", size=2)
# 由上图可以看到,脂肪酸氧化差异较为显著并且统一,以此进行火山图展示
data = subset(merge_compass_res_filter, subsystem == "Fatty acid oxidation")
data$cohens_d <- round(as.numeric(data$cohens_d),3)
data_sig = data[data$adjusted_pval <= 0.05,]
data_sig['reaction_name']
p = ggplot(data, aes(cohens_d, -log10(as.numeric(adjusted_pval))))+
geom_point(color="#BF1E2E", size=2)+
xlim(-2,2)+
labs(x = "Cohen's d",y = "-log10 (Wilcoxon-adjusted p)", title = "Fatty acid oxidation")+
geom_hline(aes(yintercept=(-log10(0.05))), color = "black", linetype="dashed", size=0.8) +#添加横线
geom_vline(aes(xintercept=0), color = "black", linetype="dashed", size=0.8)+
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_rect(size=1, colour = "black")) +
theme(axis.title =element_text(size = 14),axis.text =element_text(size = 8, color = "black"),
plot.title =element_text(hjust = 0.5, size = 16)) +
theme(legend.position = "none") + #不要放置legend
geom_text_repel(data=data_sig, aes(label=reaction_name), color="black", size=4, fontface="italic", #标签设置,颜色、大小、字体、指示箭头设置
arrow = arrow(ends="first", length = unit(0.01, "npc")), box.padding = 0.2,
point.padding = 0.3, segment.color = 'black', segment.size = 0.3, force = 1, max.iter = 3e3,
max.overlaps = Inf)
ggdraw(xlim = c(0, 1), ylim = c(-0.1,1.1))+
draw_plot(p,x = 0, y =0) +
draw_line(x = c(0.6,0.9),
y = c(-0.02,-0.02),
lineend = "round",
size =1, col = '#348C73', # 添加箭头
arrow=arrow(angle = 15,
length = unit(0.1,"inches"),
type = "closed"))+
draw_line(x = c(0.2,0.45),
y = c(-0.02,-0.02),
lineend = "round",
size =1, col = '#E92E87', # 添加箭头
arrow=arrow(angle = 15,
length = unit(0.1,"inches"),
type = "closed",
ends="first"))+
draw_text(text = "Young", size = 10,
x = 0.95, y = -0.02,
color="black",fontface = "italic")+
draw_text(text = "Older", size = 10,
x = 0.15, y = -0.02,
color="black",fontface = "italic")