library(Seurat)
library(monocle3)
library(tidyverse)
library(patchwork)
library(ggplot2)
load("test.Rdata")
dim(sce) # [1] 18103 17278
##数据集筛选
VlnPlot(sce, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(sce, features = c("nCount_RNA","nFeature_RNA", "percent.mt","percent.rb"), ncol = 4)
sce <- sce[!grepl("MALAT1", rownames(sce)), ] #过滤掉异常基因
sce <- sce[!grepl("^MT-", rownames(sce)), ] #过滤掉线粒体基因
sce <- sce[!grepl('^RP[SL]', rownames(sce)), ] #过滤掉核糖体基因
sce <- sce[!grepl("^HB[^(P)]", rownames(sce)), ] #过滤掉红细胞基因
dim(sce) #[1] 17978 17278
## 将seurat对象转换为monocle对象
scRNA$celltype = Idents(scRNA)
levels(Idents(scRNA)) #细胞类型
scRNA = subset(scRNA,idents = c("CD4 T Memory", "CD4 T CXCR3+", "CD4 T CCR6+", "CD4 T Naive", "Treg" ))
table(Idents(scRNA))
## 构建cds数据结构(monocle3数据对象)
data <- GetAssayData(scRNA, assay = 'RNA', layer = 'counts')
cell_metadata <- scRNA@meta.data
gene_annotation <- data.frame(gene_short_name = rownames(scRNA))
rownames(gene_annotation) <- rownames(data)
cds <- new_cell_data_set(data,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
cds <- preprocess_cds(cds, num_dim = 100) # 数据标准化
plot_pc_variance_explained(cds) # 确认设定的num_dim数是否足够代表主要变异
cds <- reduce_dimension(cds, preprocess_method = "PCA") # 降维,PCA或者LSI
plot_cells(cds, reduction_method="UMAP", color_cells_by="celltype") + ggtitle('cds.umap')
##从seurat导入整合过的umap坐标
cds.embed <- cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(scRNA, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
cds@int_colData$reducedDims$UMAP <- int.embed
plot_cells(cds, reduction_method="UMAP", color_cells_by="celltype") + ggtitle('int.umap')
cds <- cluster_cells(cds)
p1 <- plot_cells(cds, show_trajectory_graph = FALSE) + ggtitle("label by clusterID")
p2 <- plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE) +
ggtitle("label by partitionID")
wrap_plots(p1, p2)
cds <- learn_graph(cds)
plot_cells(cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE)
plot_cells(cds, color_cells_by = "celltype",
label_groups_by_cluster=FALSE, label_leaves=FALSE, label_branch_points=FALSE)
plot_cells(cds,color_cells_by = "celltype")
plot_cells(cds, genes=c("CCR6","CXCR3", "TP53"))
cds <- order_cells(cds)
plot_cells(cds,color_cells_by = "celltype",
label_cell_groups=FALSE, label_leaves=TRUE, label_branch_points=TRUE, graph_label_size=1.5)
plot_cells(cds, color_cells_by = "pseudotime",
label_cell_groups=FALSE, label_leaves=FALSE, label_branch_points=FALSE, graph_label_size=1.5)
第四步,拟时轨迹差异分析。
## 拟时轨迹差异分析
#graph_test分析最重要的结果是莫兰指数(morans_I),其值在-1至1之间,0代表此基因没有
#空间共表达效应,1代表此基因在空间距离相近的细胞中表达值高度相似。
Track_genes <- graph_test(cds, neighbor_graph="principal_graph", cores=10)
#挑选top10画图展示
Track_genes_sig <- Track_genes %>% top_n(n=10, morans_I) %>%
pull(gene_short_name) %>% as.character()
#基因表达趋势图
plot_genes_in_pseudotime(cds[Track_genes_sig,], color_cells_by="celltype",
min_expr=0.5, ncol = 2)
#FeaturePlot图
plot_cells(cds, genes=Track_genes_sig, show_trajectory_graph=FALSE,
label_cell_groups=FALSE, label_leaves=FALSE)
##寻找共表达模块
genelist <- pull(Track_genes, gene_short_name) %>% as.character()
gene_module <- find_gene_modules(cds[genelist,], resolution=1e-2, cores = 10)
cell_group <- tibble::tibble(cell=row.names(colData(cds)),
cell_group=colData(cds)$celltype)
agg_mat <- aggregate_gene_expression(cds, gene_module, cell_group)
row.names(agg_mat) <- stringr::str_c("Module ", row.names(agg_mat))
pheatmap::pheatmap(agg_mat, scale="column", clustering_method="ward.D2")
https://blog.csdn.net/bioyigene/article/details/130343790
https://mp.weixin.qq.com/s/siD_cLk4VGD_L-UEIQkm0g https://zhuanlan.zhihu.com/p/719551501 https://blog.csdn.net/songyi10/article/details/126454327