单细胞轨迹分析,monocle3!

学术   2025-01-13 00:06   上海  
从英语单词的结构上说,Monocle是mono(单个)+cle(class),可译为单眼镜片在单细胞数据分析时,主要用这种算法进行单细胞拟时轨迹发育分析Monocle3是目前最新版本,并针对发育生物学数据分析做了改进:发育轨迹的研究流程优化;支持UMAP推断发育轨迹,无缝衔接到reduceDimension函数,或使用独立的UMAP函数等。除了发育轨迹以外,Monocle3算法在聚类分群、差异分析方面表现不错。
我们用CD4 T 细胞的单细胞数据进行拟时发育轨迹分析的实操演示。因为monocle3是跟seurat数据无缝衔接的,所以可以直接进行seurat数据对象向monocle3数据对象转换。其实,monocle3处理数据的最大难点可能是monocle3包的安装,我们就不做解读了。
第一步,首先,加载 R包和数据
library(Seurat)library(monocle3)library(tidyverse)library(patchwork)library(ggplot2)
load("test.Rdata")dim(sce) # [1] 18103 17278
##数据集筛选VlnPlot(sce, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)VlnPlot(sce, features = c("nCount_RNA","nFeature_RNA", "percent.mt","percent.rb"), ncol = 4)
sce <- sce[!grepl("MALAT1", rownames(sce)), ] #过滤掉异常基因sce <- sce[!grepl("^MT-", rownames(sce)), ] #过滤掉线粒体基因sce <- sce[!grepl('^RP[SL]', rownames(sce)), ] #过滤掉核糖体基因sce <- sce[!grepl("^HB[^(P)]", rownames(sce)), ] #过滤掉红细胞基因
dim(sce) #[1] 17978 17278

第二步,将seurat对象转换为monocle对象,构建cds数据结构。
## 将seurat对象转换为monocle对象scRNA$celltype = Idents(scRNA)levels(Idents(scRNA))         #细胞类型scRNA = subset(scRNA,idents = c("CD4 T Memory", "CD4 T CXCR3+", "CD4 T CCR6+", "CD4 T Naive", "Treg" ))table(Idents(scRNA))
## 构建cds数据结构(monocle3数据对象)data <- GetAssayData(scRNA, assay = 'RNA', layer = 'counts')cell_metadata <- scRNA@meta.datagene_annotation <- data.frame(gene_short_name = rownames(scRNA))rownames(gene_annotation) <- rownames(data)cds <- new_cell_data_set(data, cell_metadata = cell_metadata, gene_metadata = gene_annotation)
cds <- preprocess_cds(cds, num_dim = 100) # 数据标准化plot_pc_variance_explained(cds) # 确认设定的num_dim数是否足够代表主要变异cds <- reduce_dimension(cds, preprocess_method = "PCA") # 降维,PCA或者LSIplot_cells(cds, reduction_method="UMAP", color_cells_by="celltype") + ggtitle('cds.umap')
##从seurat导入整合过的umap坐标cds.embed <- cds@int_colData$reducedDims$UMAPint.embed <- Embeddings(scRNA, reduction = "umap")int.embed <- int.embed[rownames(cds.embed),]cds@int_colData$reducedDims$UMAP <- int.embedplot_cells(cds, reduction_method="UMAP", color_cells_by="celltype") + ggtitle('int.umap')
第三步,聚类分群,绘制拟时轨迹发育图。
## Monocle3聚类分群cds <- cluster_cells(cds)p1 <- plot_cells(cds, show_trajectory_graph = FALSE) + ggtitle("label by clusterID")p2 <- plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE) +   ggtitle("label by partitionID")wrap_plots(p1, p2)
## 识别轨迹cds <- learn_graph(cds)plot_cells(cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE)plot_cells(cds, color_cells_by = "celltype", # color_cells_by可以修改 label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE)
plot_cells(cds,color_cells_by = "celltype") # color_cells_by 设置展示分组情况
plot_cells(cds, genes=c("CCR6","CXCR3", "TP53")) # genes参数可视化基因的变化
# 用order_cells进行起点排序cds <- order_cells(cds)plot_cells(cds,color_cells_by = "celltype", label_cell_groups=FALSE, label_leaves=TRUE, label_branch_points=TRUE, graph_label_size=1.5)
plot_cells(cds, color_cells_by = "pseudotime",            label_cell_groups=FALSE, label_leaves=FALSE, label_branch_points=FALSE, graph_label_size=1.5)

第四步,拟时轨迹差异分析。

## 拟时轨迹差异分析#graph_test分析最重要的结果是莫兰指数(morans_I),其值在-1至1之间,0代表此基因没有#空间共表达效应,1代表此基因在空间距离相近的细胞中表达值高度相似。Track_genes <- graph_test(cds, neighbor_graph="principal_graph", cores=10)
#挑选top10画图展示Track_genes_sig <- Track_genes %>% top_n(n=10, morans_I) %>% pull(gene_short_name) %>% as.character()
#基因表达趋势图plot_genes_in_pseudotime(cds[Track_genes_sig,], color_cells_by="celltype", min_expr=0.5, ncol = 2)#FeaturePlot图plot_cells(cds, genes=Track_genes_sig, show_trajectory_graph=FALSE, label_cell_groups=FALSE, label_leaves=FALSE)
##寻找共表达模块genelist <- pull(Track_genes, gene_short_name) %>% as.character()gene_module <- find_gene_modules(cds[genelist,], resolution=1e-2, cores = 10)cell_group <- tibble::tibble(cell=row.names(colData(cds)), cell_group=colData(cds)$celltype)agg_mat <- aggregate_gene_expression(cds, gene_module, cell_group)row.names(agg_mat) <- stringr::str_c("Module ", row.names(agg_mat))pheatmap::pheatmap(agg_mat, scale="column", clustering_method="ward.D2")

  1. https://blog.csdn.net/bioyigene/article/details/130343790

  2. https://mp.weixin.qq.com/s/siD_cLk4VGD_L-UEIQkm0g
  3. https://zhuanlan.zhihu.com/p/719551501
  4. https://blog.csdn.net/songyi10/article/details/126454327

芒果师兄
1.生信技能和基因编辑。2.论文发表和基金写作。3. 健康管理和医学科研资讯。4.幸福之路,读书,音乐和娱乐。
 最新文章