写在前面的话
单细胞转录组的细胞比例变化分析手段多样,看一看不同分析手段下得出的普遍性生物学结论是否相似?往往试验设计>2组,或是两两比较比较易懂,所以采用了3组进行分析包括miloR丰度的分析。
细胞比例-频率分析
# 样本分组
scRNA_male$year<- factor(scRNA_male$year, levels=c("Young","Adult","Older"))
# 第一种,每个样本某一细胞类型占所有样本这一细胞类型的比例
cell <- data.frame(table(scRNA_male$celltype,scRNA_male$orig.ident,
scRNA_male@meta.data[,'year']))
colnames(cell) <- c("Celltype","SampleID","Group","Freq")
cell=cell[which(cell$Freq != 0),]
cell=cell[,c(1,3,4)]
# 同一类型细胞相加
cell$Total <- apply(cell,1,function(x)sum(cell[cell$Celltype == x[1],3]))
cell<- cell %>% mutate(Percentage = round(Freq/Total,3) * 100)
cell=cell[,c(1,2,5)]
# 第二种,每个样本某一细胞类型占这个样本这一细胞类型的比例
library(data.table)
cell = data.table(scRNA_male$celltype,scRNA_male$orig.ident,
scRNA_male@meta.data[,'year'])
colnames(cell) <- c("Celltype","SampleID","Group")
cell <- cell[,.(N=.N),by=c("Celltype","SampleID","Group")]
cell.sum.data <- cell[,{.(NTotal = sum(.SD$N))},by=c("SampleID","Group")]
cell = left_join(cell,cell.sum.data)
cell$freq = cell$N/cell$NTotal *100
火山图
# 因数据有三组,火山图适用于两组比较
cell$Group <- as.character(cell$Group)
cell$Group <- replace(cell$Group,cell$Group==c("Young","Adult"),"Young_Adult")
cell$Group= as.factor(cell$Group)
cell$Group= factor(cell$Group,levels=c("Young_Adult","Older"))
df= do.call(rbind,
lapply(split(cell,cell$Celltype), function(x){
tmp = t.test(x$Percentage ~ x$Group)
return(c(tmp$p.value, tmp$estimate[1]-tmp$estimate[2]))
}))
df = as.data.frame(df)
colnames(df) = c("pval","Difference")
df$threshold = factor(ifelse(df$Difference > 0 ,'Down','UP'))
df$label <- rownames(df)
df %>% ggplot(aes(Difference, -log10(pval)))+
# 横向水平参考线:
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#999999")+
# 纵向垂直参考线:
geom_vline(xintercept = c(0), linetype = "dashed", color = "#999999")+
# 散点图:
geom_point(aes(size=-log10(pval), color= -log10(pval)),shape=16)+
# 指定颜色渐变模式:
scale_color_gradientn(values = seq(0,1,0.2),
colors = c("#39489f","#39bbec","#f9ed36","#f38466","#b81f25"))+
# 指定散点大小渐变模式:
scale_size_continuous(range = c(1,10))+
# 主题调整和图例位置:
theme_bw()+
theme(panel.grid = element_blank(),
legend.position = c(0.01,0.7),
legend.justification = c(0,1)
)+
# 设置部分图例不显示:
guides(col = guide_colourbar(title = "-Log10(pval)"),
size = "none")+
# 添加标签
geom_text(aes(label=label, color = -log10(pval)), size = 4, vjust = 1.5, hjust=1)+
# 修改坐标轴:
xlab("Difference")+
ylab("-Log10(pval)")
较大影响的Myeloid,Hep类群,P value值有所不同
箱型图
compraisions.list=list(c("Young","Adult"),c("Young","Older"),c("Adult","Older"))
# 指定颜色
group.color = c("Male_Young"= "#4a1486","Male_Adult"= "#7A0177","Male_Older"= "#006837")
library(ggpubr)
p.list.perMcls <- lapply(unique(sort(cell$Celltype)),function(mcls){
text.size = 20
text.angle = 45
text.hjust = 1
legend.position = "none"
p <- ggboxplot(cell[Celltype== mcls ,],x="Group",y="Percentage",
color = "Group", legend="none",title=mcls,
xlab="",ylab="frequency",
add = "jitter",outlier.shape=NA) +
scale_color_manual(values=group.color) +
# 显著性比较
stat_compare_means(label="p.format",comparisons=compraisions.list) +
coord_cartesian(clip="off") # 允许绘图对象超出坐标轴范围时依然可见
+
theme(plot.title = element_text(size = text.size+2,color="black",hjust = 0.5),
axis.ticks = element_line(color = "black"),
axis.title = element_text(size = text.size,color ="black"),
axis.text = element_text(size=text.size,color = "black"),
axis.text.x = element_text(angle = text.angle, hjust = text.hjust ),
legend.position = legend.position,
legend.text = element_text(size= text.size),
legend.title= element_text(size= text.size),
strip.background = element_rect(color="black",size= 1, linetype="solid"),
)
return(p)
})
library(patchwork)
wrap_plots(p.list.perMcls,ncol = 4)
Myeloid显著增多,Hep类群显著降低
## 堆叠比例图
table(scRNA_male$year)#查看各组细胞数
Idents(scRNA_male)<-"celltype"
prop.table(table(Idents(scRNA_male)))
table(Idents(scRNA_male), scRNA_male$year)#各组不同细胞群细胞数
#按列计算各组样本不同细胞群比例
Cellratio <- prop.table(table(Idents(scRNA_male), scRNA_male$year), margin = 2)
Cellratio <- as.data.frame(Cellratio)
#Cellratio重命名
colnames(Cellratio) <- c("Celltype","Group","Ratio")
library(ggalluvial)
library(ggthemes)
Cellratio %>% ggplot(aes(x=Group,y=Ratio,fill=Celltype,stratum=Celltype,alluvium=Celltype))+
geom_col(width = 0.4,color=NA)+
geom_flow(width =0.4, alpha=0.2,knot.pos=0)+
scale_fill_manual(values = use_colors)+
theme_map()+
theme(axis.text.x = element_text(size=20,vjust = 5),
legend.position = "none")
Augur分析-细胞响应
library(Augur)
# 要求metadata数据有label和cell_type
scRNA_male@meta.data$cell_type <- scRNA_male@meta.data$celltype
scRNA_male@meta.data$label <- scRNA_male@meta.data$year
augur = calculate_auc(scRNA_male, n_threads = 8)
# 可视化-UMAP
library(viridis)
plot_umap(augur,scRNA_male,palette = "viridis")
# 可视化-饼图
data <- plot_lollipop(augur)$data
data <- data[order(data$auc, decreasing = TRUE), ]
data$cell_type <- factor(data$cell_type, levels = data$cell_type)
ggplot(data,aes(x=cell_type,y=auc))+
geom_col(fill=use_colors)+ xlab("")+
geom_text(hjust=1,aes(y=0,label=paste(cell_type,"-",round(auc,3))))+
theme(axis.text = element_blank(),
legend.position ="none")+
ylim(0,1)+ theme_void()+
coord_polar(theta = "y")
T/NKT和Fibro细胞亚群受试验设计最先响应
miloR细胞丰度分析
library(miloR)
#miloR输入对象是SingleCellExperiment,所以我们是常用的seurat对象的话,转化一下
scRNA_sce <- as.SingleCellExperiment(scRNA_male)
scRNA_sce_milo <- miloR::Milo(scRNA_sce)#milo object构建
scRNA_sce_milo <- miloR::buildGraph(scRNA_sce_milo, k = 40, d = 50)#k是构建graph是需要考虑的邻近的数量,这个参数可自行调整,K值至少为样本数的3到5倍,构建k-nearest neighbour graph
scRNA_sce_milo <- makeNhoods(scRNA_sce_milo,
prop = 0.1, #随机抽取细胞比例,一般情况0.1-0.2足够
k = 40, #建议使用与buildGraph一样的k值
d=50, #KNN降维数,建议使用与buildGraph一样的d值
refined = TRUE)
library(DESeq2) #构建表达矩阵用到ColData函数
scRNA_sce_milo <- countCells(scRNA_sce_milo, meta.data = data.frame(colData(scRNA_sce_milo)), samples="orig.ident") # make the nhood X sample counts matrix
library(ggplot2)
plotNhoodSizeHist(scRNA_sce_milo)+scale_x_continuous(limits = c(0, 500), breaks = seq(0, 500, 40)) #看K值是否合适
K值选的过小,每个neighbourhood囊括的细胞数越多、Neighbourhood的数目变少,不利于稀有亚群的挖掘,K值选的过大,每个neighbourhood囊括的细胞数越少、Neighbourhood的数目变多,造成信息的过于冗余偏离生物学意义。选择mean neighbourhood size 不超过兴趣亚群预期含量的10%时的K值。
# 试验设计矩阵的构建
scRNA_sce_design <- data.frame(colData(scRNA_sce_milo))[,c("orig.ident", "year")]
scRNA_sce_design <- distinct(scRNA_sce_design)
rownames(scRNA_sce_design) <- scRNA_sce_design$orig.ident
scRNA_sce_design <- scRNA_sce_design[colnames(nhoodCounts(scRNA_sce_milo)), , drop=FALSE]
# 分析的model构建
model <- model.matrix(~ 0 + year, data=scRNA_sce_design)
ave.contrast <- c("yearOlder-(yearYoung+yearAdult)/2")
mod.constrast <- makeContrasts(contrasts=ave.contrast, levels=model)
# 计算cell距离,差异分析
scRNA_sce_milo <- calcNhoodDistance(scRNA_sce_milo, d=50)
da_results <- testNhoods(scRNA_sce_milo, design = ~ 0 + year, design.df = scRNA_sce_design, model.contrasts = ave.contrast, fdr.weighting="graph-overlap")
da_results <- annotateNhoods(scRNA_sce_milo, da_results, coldata_col = "celltype")
# 查看邻域的同质性如何
ggplot(da_results, aes(celltype_fraction)) + geom_histogram(bins=50)
# 虽然邻域大部分是同质的,但我们可以为 celltype_fraction 定义一个阈值,以排除混合细胞类型的邻域
da_results$celltype <- ifelse(da_results$celltype_fraction < 0.7, "Mixed", da_results$celltype)
# 可视化-蜂群图
library(scales)
plotDAbeeswarm(da_results, group.by = "celltype") +
scale_color_gradient2(low="#070091",
mid="lightgrey",
high="#910000",
limits=c(-5,5),
oob = squish) +
labs(x="", y="Log2 Fold Change") +
theme_bw(base_size=10)+
theme(axis.text = element_text(colour = 'black'))
# 可视化-邻域图
scRNA_sce_milo <- buildNhoodGraph(scRNA_sce_milo)
plotNhoodGraphDA(scRNA_sce_milo, da_results,layout="UMAP", alpha=0.1) +
scale_fill_gradient2(low="#070091",#修改颜色
mid="lightgrey",
high="#910000",
name="log2FC",
limits=c(-5,5),
oob = squish)