拟时序| monocle3+velo.R+scvelo呈现

文摘科学 2023-06-16 08:32 浙江

写在前面的话

拟时序分析已经是单细胞和空间转录组中的应用文章中的必需分析，干货记录分享，简单易懂！

提取亚群再细分

library(Seurat)
library(monocle3)
library(tidyverse)
library(patchwork)
## 提取只含T细胞的子集
scRNA.T <- subset(scRNA,idents="Tc")
## 重新降维聚类
scRNA.T <- NormalizeData(scRNA.T, normalization.method = "LogNormalize", scale.factor = 1e4) 
scRNA.T <- FindVariableFeatures(scRNA.T, selection.method = 'vst', nfeatures = 3000)
scRNA.T <- ScaleData(scRNA.T,rownames(scRNA.T))
scRNA.T <- RunPCA(scRNA.T, features = VariableFeatures(object = scRNA.T)) 
library(harmony)
scRNA.T <- RunHarmony(scRNA.T, "orig.ident")
scRNA.T <- RunUMAP(scRNA.T,dims = 1:30, 
              reduction = "harmony")
scRNA.T <- FindNeighbors(scRNA.T, reduction = "harmony",
                    dims = 1:30) 
for (res in c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5,0.8,1)) {
  scRNA.T=FindClusters(scRNA.T, #graph.name = "CCA_snn", 
                  resolution = res, algorithm = 1)
}
apply(scRNA.T@meta.data[,grep("RNA_snn",colnames(scRNA.T@meta.data))],2,table)
Idents(scRNA.T)<- "RNA_snn_res.0.1"
p1 <- DimPlot(scRNA.T, reduction = "umap",
        label = T,label.box = T)+xlim(-15,15)

monocle3

## monocle3 轨迹分析
# ##创建CDS对象并预处理数据
data <- GetAssayData(scRNA.T, assay = 'RNA', slot = 'counts')
cell_metadata <- scRNA.T@meta.data
gene_annotation <- data.frame(gene_short_name = rownames(data))
rownames(gene_annotation) <- rownames(data)
cds <- new_cell_data_set(data,
                         cell_metadata = cell_metadata,
                         gene_metadata = gene_annotation)
#preprocess_cds函数相当于seurat中NormalizeData+ScaleData+RunPCA
cds <- preprocess_cds(cds, num_dim = 50)
##从seurat导入整合过的umap坐标
cds.embed <- cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(scRNA.T, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
cds@int_colData$reducedDims$UMAP <- int.embed
p2 <- plot_cells(cds, reduction_method="UMAP", color_cells_by="celltype1") + ggtitle('int.umap')
## Monocle3聚类分区
cds <- cluster_cells(cds)
p1 <- plot_cells(cds, show_trajectory_graph = FALSE) + ggtitle("label by clusterID")
p2 <- plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE) + 
  ggtitle("label by partitionID")
p = wrap_plots(p1, p2)
## 识别轨迹
cds <- learn_graph(cds)
p = plot_cells(cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, 
               label_branch_points = FALSE,color_cells_by = "celltype", group_label_size=4,cell_size=1.5)
# 确定root
cds = order_cells(cds)

plot_cells(cds,
           color_cells_by = "pseudotime",
           label_cell_groups=FALSE,
           label_leaves=TRUE,
           label_branch_points=TRUE,
           graph_label_size=1.5,
           group_label_size=4,cell_size=1.5)

velocyto.R

#loom 文件生成
cellranger_gtf=$HOME/reference/scgenome.gtf
rmsk_gtf=$HOME/reference/Sus_repeat_rmsk.gtf  ## UCSC 版本确定
cellranger_outDir=/ifs/home/yusenwei/data/project/N1
velocyto run10x -m $rmsk_gtf  $cellranger_outDir $cellranger_gtf
R
library(velocyto.R)
library(Seurat)
library(SeuratWrappers)
scRNA.T <- readRDS("./scRNA.T.rds")
#载入 loom文件
N1= read.loom.matrices("./N1/velocyto/N2.loom")
N2= read.loom.matrices("./N2/velocyto/N2.loom")
N3= read.loom.matrices("./N3/velocyto/N3.loom")
R1= read.loom.matrices("./R1/velocyto/R1.loom")
R2= read.loom.matrices("./R2/velocyto/R2.loom")
R3= read.loom.matrices("./R3/velocyto/R3.loom")
#修改列名
N1$spliced[1:5, 1:5]
N1 <- lapply(N1, function(x) {
  colnames(x) <- sub("x", "-1", sub("N1", "N1_", colnames(x)))
  x
})
N2<- lapply(N2, function(x) {
  colnames(x) <- sub("x", "-1", sub("N2", "N2_", colnames(x)))
  x
})
N3<- lapply(N3, function(x) {
  colnames(x) <- sub("x", "-1", sub("N3", "N3_", colnames(x)))
  x
})
R1 <- lapply(R1, function(x) {
  colnames(x) <- sub("x", "-1", sub("R1", "R1_", colnames(x)))
  x
})
R2 <- lapply(IUGR2, function(x) {
  colnames(x) <- sub("x", "-1", sub("R2", "R2_", colnames(x)))
  x
})
R3 <- lapply(R3, function(x) {
  colnames(x) <- sub("x", "-1", sub("R3", "R3_", colnames(x)))
  x
})
ldat <- list(
  N1=N1,
  N2=N2,
  N3=N3,
  R1=R1,
  R2=R2,
  R3=R3
)
matrix.name <- names(ldat$NBW1)
ldat <- lapply(matrix.name, function(x){
  dat.list <- lapply(ldat, function(y){
    y[[x]]
  })
  dat.merged <- do.call(cbind, dat.list)
  dat.merged
})
names(ldat) <- matrix.name
cells.id <- colnames(scRNA.T)
cells.id[1:5]
ldat <- lapply(ldat, function(x) {
  x[, cells.id]
})
lapply(ldat, dim)
emat <- ldat$spliced
nmat <- ldat$unspliced
cell_cluster <- as.character(scRNA.T@meta.data$celltype1)
names(cell_cluster) <- cells.id
emat <- filter.genes.by.cluster.expression(emat, cell_cluster, min.max.cluster.average = .1)
nmat <- filter.genes.by.cluster.expression(nmat, cell_cluster, min.max.cluster.average = .1)
length(intersect(rownames(emat), rownames(nmat)))

#RNA Velocity分析
### 参数设置
fit.quantile = 0.05 # 官方教程设定为 0.05
deltaT = 1 # default: 1
kCells = 10 # default: 10
### RNA velocity分析
rvel.qf <- gene.relative.velocity.estimates(emat, nmat, deltaT = deltaT, kCells = kCells, fit.quantile = fit.quantile)


### 参数设定
emb = t(reducedDimS(monocle_cds)) # DDRTree坐标 monocle2
emb = Embeddings(scRNA.T, reduction = "umap") # monocle3 seurat
vel = rvel.qf # velocity estimates
n = 100 # 最邻近细胞的数量
scale = "sqrt" # scale方法
# 散点的颜色（用以区分不同的细胞状态）
cell.colors = plyr::mapvalues(cell_cluster, names(colors), colors)
cell.alpha = 0.2 # 散点颜色的透明度
cell.cex = 1 # 散点的尺寸
arrow.scale = 1 # 箭头的长度
arrow.lwd = 1.5 # 箭头的粗细
grid.n = 50 # grids的数量

### plot
pdf(file="velo.pdf")
show.velocity.on.embedding.cor(emb, vel, n, scale=scale, 
                               cell.colors = ac(cell.colors, alpha = cell.alpha),
                               cex = cell.cex, arrow.scale = arrow.scale, 
                               show.grid.flow = TRUE, min.grid.cell.mass = 1,
                               grid.n = grid.n, arrow.lwd = arrow.lwd,cell.border.alpha = 0.1)
dev.off()

scvelo

import anndata
import scvelo as scv
import pandas as pd
import numpy as np
import matplotlib as plt
import scanpy as sc
import os
#载入 loom文件
N1= anndata.read_loom("./N1/velocyto/N2.loom")
N2= anndata.read_loom("./N2/velocyto/N2.loom")
N3= anndata.read_loom("./N3/velocyto/N3.loom")
R1= anndata.read_loom("./R1/velocyto/R1.loom")
R2= anndata.read_loom("./R2/velocyto/R2.loom")
R3= anndata.read_loom("./R3/velocyto/R3.loom")
#修改列名
barcodes = [bc.split(':')[1] for bc in N1.obs.index.tolist()] 
barcodes=[bc[0:len(bc)-1]+'-1' for bc in barcodes]
barcodes=['N1_'+ bc[0:len(bc)] for bc in barcodes]
N1.obs.index = barcodes
N1.var_names_make_unique()
barcodes = [bc.split(':')[1] for bc in N2.obs.index.tolist()] 
barcodes=[bc[0:len(bc)-1]+'-1' for bc in barcodes]
barcodes=['N2_'+ bc[0:len(bc)] for bc in barcodes]
N2.obs.index = barcodes
N2.var_names_make_unique()
barcodes = [bc.split(':')[1] for bc in N3.obs.index.tolist()] 
barcodes=[bc[0:len(bc)-1]+'-1' for bc in barcodes]
barcodes=['N3_'+ bc[0:len(bc)] for bc in barcodes]
N3.obs.index = barcodes
N3.var_names_make_unique()
barcodes = [bc.split(':')[1] for bc in R1.obs.index.tolist()] 
barcodes=[bc[0:len(bc)-1]+'-1' for bc in barcodes]
barcodes=['R1_'+ bc[0:len(bc)] for bc in barcodes]
R1.obs.index = barcodes
R1.var_names_make_unique()
barcodes = [bc.split(':')[1] for bc in R2.obs.index.tolist()] 
barcodes=[bc[0:len(bc)-1]+'-1' for bc in barcodes]
barcodes=['R2_'+ bc[0:len(bc)] for bc in barcodes]
R2.obs.index = barcodes
R2.var_names_make_unique()
barcodes = [bc.split(':')[1] for bc in R3.obs.index.tolist()] 
barcodes=[bc[0:len(bc)-1]+'-1' for bc in barcodes]
barcodes=['R3_'+ bc[0:len(bc)] for bc in barcodes]
R3.obs.index = barcodes
R3.var_names_make_unique()
ldata= N1.concatenate([N2,N3,R1,R2,R3])
adata=sc.read_h5ad('./data/project1/scRNA.T.h5ad')
adata= scv.utils.merge(adata,ldata)
scv.pp.filter_and_normalize(adata,min_shared_counts=20,n_top_genes=2000)
scv.pp.moments(adata,n_pcs=30,n_neighbors=30)
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata,mode='dynamical')
scv.tl.velocity_graph(adata)
scv.pl.velocity_embedding_stream(adata,basis='umap',color='celltype', title='', smooth=.8, min_mass=4)

朴素的科研打工仔

专注于文献的分享，浙大研究生学习生活的记录。

最新文章

Compass| 单细胞代谢通量预测下游分析

细胞代谢|元细胞构建+代谢通量分析

相关性|空转+空代区域GSVA相关性分析

引爆学术界“ 核弹 ”寒门学子凭借这篇Nature一跃成为特级教授！独占鳌头！

细胞代谢|元细胞构建+代谢通量分析

共表达网络| WGCNA与hdWGCNA实操

细胞比例|一文打通单细胞转录组细胞类型丰度变化分析

数据整合|单细胞转录组多样本/不同测序手段整合

免费免费~影像组学论文拆解训练营火热开展中 | SCI制造机--影像组学+病理组学+深度学习人工智能培训班

Adv. Sci.丨宫内生长受限新生儿对肝损伤的性二型反应（解读）

空间转录组细胞信号流和轨迹推断-COMMOT/SPATA

空间转录组细胞通讯-stLearn（cell2location/RCTD）

单细胞和空间转录组联合分析-Cell2location+MIA

单细胞和空间转录组联合分析-RCTD去卷积

文献合集(五)｜单细胞如何做？做到什么程度？

文献合集(四)｜单细胞如何做？做到什么程度？

文献合集(三)｜单细胞如何做？做到什么程度？

文献合集(二)｜单细胞如何做？做到什么程度？

文献合集(一)｜单细胞技术如何应用？

拟时序| CytoTRACE +slingshot呈现

拟时序| monocle3+velo.R+scvelo呈现

文献合集(二)｜单细胞如何做？做到什么程度？

文献合集(一)｜单细胞技术如何应用？

2022 IF 正式发布

拟时序| monocle3+velo.R+scvelo呈现

WB| 条带变形计

共表达网络| WGCNA与hdWGCNA实操

这几个生信分析常用工具你真的学通了吗？单细胞，多组学or基因都能分析到底！

人工智能基因组学，Chip-Seq、RNA-Seq中预测premiRNA，您想知道的都在这！

单细胞/空间转录组分析常用工具汇总

新手入学ScRNA-seq分析流程

空间转录组|Seurat包初探

表观遗传学|Chip-seq、ATAC-seq分析流程

linux|合并文件命令

表观转录组|m6A-seq分析流程

整理| 常见注释数据库介绍

Shiny|自测单细胞数据供人访问

转录组|DESeq2差异基因分析-小趣事

空间转录组|Seurat包初探

分类

时事

民生

政务

教育

文化

科技

财富

体娱

健康

情感

旅行

百科

职场

楼市

企业

乐活

学术

汽车

时尚

创业

美食

幽默

美体

文摘

原创标签

时事社会财经军事教育体育科技汽车科学房产搞笑综艺明星音乐动漫游戏时尚健康旅游美食生活摄影宠物职场育儿情感小说曲艺文化历史三农文学娱乐电影视频图片新闻宗教电视剧纪录片广告创意壁纸头像心灵鸡汤星座命理教育培训艺术文化金融财经健康医疗美妆时尚餐饮美食母婴育儿社会新闻工业农业时事政治星座占卜幽默笑话独立短篇连载作品文化历史科技互联网

发布位置

广东北京山东江苏河南浙江山西福建河北上海四川陕西湖南安徽湖北内蒙古江西云南广西甘肃辽宁黑龙江贵州新疆重庆吉林天津海南青海宁夏西藏香港澳门台湾美国加拿大澳大利亚日本新加坡英国西班牙新西兰韩国泰国法国德国意大利缅甸菲律宾马来西亚越南荷兰柬埔寨俄罗斯巴西智利卢森堡芬兰瑞典比利时瑞士土耳其斐济挪威朝鲜尼日利亚阿根廷匈牙利爱尔兰印度老挝葡萄牙乌克兰印度尼西亚哈萨克斯坦塔吉克斯坦希腊南非蒙古奥地利肯尼亚加纳丹麦津巴布韦埃及坦桑尼亚捷克阿联酋安哥拉