空间转录组|Seurat包初探

文摘科学 2023-03-18 10:00 江苏

写在前面的话

目前通过单细胞转录谱重建异质性，有效提高了我们对组织异质性的理解。新兴的空间成像组学的迅速发展有助于我们对组织功能和细胞间相互作用原生环境的理解。这正是现在空转能发高分文章的主要原因，空间维度的信息对于生命科学研究的重要性。

跟随时代潮流的脚步学起来，数据处理起来其实和单细胞转录组有许多类似之处，所以上手还是很快的。有意思的是，主流分析软件python越来越占据主导了，说明大数据高通量时代精通python的重要性。

以10x visium官方的小鼠脑切片数据为例，Seurat分析流程记录......

准备工作

#安装相关R包
install.packages("Seurat")
install.packages("ggplot2")
install.packages("dplyr")
install.packages("hdf5r")
install.packages("future")
install.packages("patchwork")
#加载相关R包
library(Seurat)
library(dplyr)
library(ggplot2)
library(hdf5r)
library(patchwork)
#转换工作目录
getwd()
setwd()

读取数据

#读取10X ST数据(hdf5)
anterior1 <- Load10X_Spatial(data.dir = "./Anterior",slice = "anterior")
posterior1 <- Load10X_Spatial(data.dir = "./posterior/",slice = "posterior")
dim(anterior1)
dim(posterior1)
#读取ST的三元组文件
read10X_Spatial <- function (data.dir="",image.dir="",assay = "Spatial", 
                             slice = "slice1",project="CreateSeuratObject") 
  {data <- Read10X(data.dir = data.dir)
  object <- CreateSeuratObject(counts = data, assay = assay,project = project)
  image <- Read10X_Image(image.dir = image.dir)
  image <- image[Cells(x = object)]
  DefaultAssay(object = image) <- assay
  object[[slice]] <- image
  return(object)}

anterior <- read10X_Spatial(data.dir = "./anterior/filtered_feature_bc_matrix/",image.dir = "./anterior/spatial/",slice = "anterior",project = "anterior")
posterior <- read10X_Spatial(data.dir = "posterior/filtered_feature_bc_matrix/",image.dir = "./posterior/spatial/",slice = "posterior",project = "posterior")
#合并样品
st <- merge(anterior,posterior,add.cell.ids = c("ante","post"))
#多样品合并
#st <- merge(a,y=c(b,c,d),add.cell.ids = c("a","b","c","d"),project = "four_merged")

数据质控

# 计算细胞中线粒体基因比例
st[["percent.mt"]] <- PercentageFeatureSet(st, pattern = "^mt-")
# 计算细胞中核糖体基因比例
st[["percent.rb"]] <- PercentageFeatureSet(st, pattern = "^Rp[ls]")
#分子数
p1 <- VlnPlot(st, features = "nCount_Spatial") +theme(legend.position = "none")
p2 <- SpatialFeaturePlot(st, features = "nCount_Spatial") + theme(legend.position = "right")
p1|p2
# 基因数
p1 <- VlnPlot(st, features = "nFeature_Spatial") +theme(legend.position = "none")
p2 <- SpatialFeaturePlot(st, features = "nFeature_Spatial") + theme(legend.position = "right")
p1|p2
# 线粒体
p1 <- VlnPlot(st, features = "percent.mt") +theme(legend.position = "none")
p2 <- SpatialFeaturePlot(st, features = "percent.mt") + theme(legend.position = "right")
p1|p2
# 核糖体
p1 <- VlnPlot(st, features = "percent.rb") +theme(legend.position = "none")
p2 <- SpatialFeaturePlot(st, features = "percent.rb") + theme(legend.position = "right")
p1|p2
# 质控
minCount = 1500
minFeature = 500
maxmt = 15
st <- subset(st, nCount_Spatial>minCount&nFeature_Spatial>minFeature&percent.mt<maxmt)
dim(st)

数据前处理

#对数据进行归一化处理
#方法一：log均一化
st <- NormalizeData(st,normalization = "LogNormalize")
st <- FindVariableFeatures(st,selection.method = "vst",verbose = F)
st <- ScaleData(st,features = rownames(st))
#线性降维（PCA）,默认用高变基因集，但也可通过 features 参数自己指定
#同时也可对主成分数量进行定义 
st <- RunPCA(st,npcs = 50)
#确定数据集的分群个数 
#Jackstraw 置换检验算法；重复取样（原数据的 1%），重跑PCA,鉴定p-value较小的PC；计算‘null distribution’(即零假设成立时)时的基因 scores;
#ST数据背景噪音较小，可以检验更多的主成分以提高分群准确性
st <- JackStraw(st,dims = 50) #7min
st <- ScoreJackStraw(st,dims = 1:50)
JackStrawPlot(st,dims = 1:50)
ElbowPlot(st,ndims = 50) #碎石图

#方法二：SCTransform方法
#SCTranform的归一化方式无法执行Jackstraw置换检验
st <- SCTransform(st,assay = "Spatial")  #3min
st <- RunPCA(st)
#基于PCA对Spot进行聚类
st <- FindNeighbors(st,dims = 1:20)
st <- FindClusters(st,resolution = 0.4)
#使用组织切片图片来呈现分群结果
p1 <- SpatialDimPlot(st,label = T,label.size = 3,cols = col1)
p1

#除了组织切片，数据也可以使用tSNE非线性降维进行可视化
st <- RunTSNE(st,dims = 1:30,label=T)
p2 <- DimPlot(st,reduction = "tsne",pt.size = 1.5,label = T)
p2
st <- RunUMAP(st, dims = 1:30, label = T)
p3 <- DimPlot(st, reduction = "umap",pt.size = 1.5,label = T)
p3
p2+p3
ggsave("tsne_umap1.pdf",p2+p3,width = 30,height = 18,units = "cm")
#data.tsne <- st@reductions[["tsne"]]@cell.embeddings    提取tsne或umap图的坐标信息
#data.cluster <- st[["seurat_clusters"]]
#write.csv(data.tsne,"data.tsne.csv",row.names = T,quote = T)
#write.csv(data.cluster,"data.cluster.csv",row.names = T,quote = T)

#亚群和样本的细胞分布
#a <- c(rep(0,ncol(st)))
#for (i in 1:ncol(st)) {
#  a[i] <- strsplit(colnames(st),"_")[[i]][1]}
#st@meta.data[["orig.ident"]] <- a   修改样本分组信息
DimPlot(st,reduction = "umap",group.by = c("seurat_clusters","orig.ident"),pt.size = 1.2)
DimPlot(st,reduction = "tsne",group.by = c("seurat_clusters","orig.ident"),pt.size = 1.2)

#可以使用交互界面同时呈现两种降维方式，以完成对应性查看
LinkedDimPlot(st,reduction = "umap",image = "anterior")
LinkedDimPlot(st,reduction = "tsne",image = "posterior")
LinkedFeaturePlot(st, feature = "Stx1a",image = "anterior")
LinkedFeaturePlot(st, feature = "Car8",image = "posterior",reduction = "tsne")

#高亮cluster 1的spot
SpatialDimPlot(st,cells.highlight = CellsByIdentities(st,idents = "1"))
#如果要高亮多个cluster，需要将facet.highlight参数改为TRUE，否则会报错
SpatialDimPlot(st,cells.highlight = CellsByIdentities(st,idents = c("0","4","16")),images = "anterior",facet.highlight = T)
SpatialDimPlot(st,cells.highlight = CellsByIdentities(st,idents = c("0","4","16")),images = "posterior",facet.highlight = T)

#去HE染色的组织切片映射图
imagerow <- anterior@images[["anterior"]]@coordinates[["imagerow"]]
imagecol <- anterior@images[["anterior"]]@coordinates[["imagecol"]]
coord <- as.matrix(data.frame(row.names = colnames(anterior),image_1=imagerow,image_2=imagecol))
ante_coord=CreateDimReducObject(embeddings = coord,key = "image",assay = "Spatial")
anterior@reductions[["ante"]]=ante_coord
color1<-colorRampPalette(c("#4476B6", "#FFFEC2", "#FD9B4A"))(50)
FeaturePlot(anterior,reduction = "ante",features = "Stx1a")+scale_x_reverse(limits=c(11000,1000))+coord_flip()+scale_fill_manual(values = color1)

#亚群分群切片图
anterior@meta.data[["seurat_clusters"]]=st$seurat_clusters[1:2695]
p4<-DimPlot(anterior,reduction = "ante",group.by = "seurat_clusters",pt.size = 1.5)+scale_x_reverse(limits=c(11000,1000))+coord_flip()
mytheme <- theme(axis.line = element_blank(),axis.text = element_blank(),
                 axis.title = element_blank(),axis.ticks = element_blank(),
                 plot.background = element_rect(colour = "black"))
p4+mytheme

差异基因分析

DefaultAssay(st) <- "Spatial"
dif<-FindAllMarkers(st,assay = "Spatial",logfc.threshold = 0.6,min.pct = 0.4,only.pos = T)
write.table(dif,"dif.txt",sep = "\t",quote = F,row.names = T,col.names = T)
#dif1 <- read.table("dif.txt",header = T,row.names = 1,sep = "\t")
sig.dif<-dif%>%group_by(cluster)%>%top_n(n = 5,wt = avg_log2FC)
write.table(sig.dif,"dif.sig.txt",sep = "\t",quote = F,row.names = T,col.names = T)
genes <- unique(sig.dif$gene)
length(genes)
#图形可视化
DotPlot(st,features =genes[26:45]) +theme(axis.text.x = element_text(angle = 45,hjust = 1))#气泡图
FeaturePlot(st, features = genes[16:27],ncol = 3) #基因表达umap映射图
VlnPlot(st, features = genes[75:79],pt.size=0) #小提琴图
DoHeatmap(st,features = genes,angle=45)  #热图
SpatialFeaturePlot(st,features = genes[34],pt.size=1.5,alpha=c(0.1,1),interactive = F) #组织映射图
SpatialPlot(st,features = genes[34],pt.size=1.5,alpha=c(0.1,1),images = "posterior",interactive = T) #组织映射图
#两个亚群之间的差异基因分析
group.cluster.dif<-FindMarkers(st,assay = "Spatial",ident.1 = "3",ident.2 = "15",
                       logfc.threshold = 0.6,min.pct = 0.4)
group.cluster.sig.dif<-group.cluster.dif%>%top_n(n = 10,wt = abs(avg_log2FC))
group.cluster.sig.dif1<-rbind(group.cluster.dif%>%top_n(n = 10,wt = avg_log2FC),
                              group.cluster.dif%>%top_n(n = -10,wt = avg_log2FC))
#不同样本之间的亚群差异基因分析
st[["group.cluster"]]<-paste(st$orig.ident,Idents(st),sep = '_')
group.sample.dif <- FindMarkers(st,assay = "Spatial",group.by = "group.cluster",
                         ident.1 = "anterior_1",ident.2 = "posterior_1",
                         logfc.threshold = 0.6,min.pct = 0.4)
group.sample.sig.dif<-group.sample.dif%>%top_n(n = 10,wt = abs(avg_log2FC))
group.sample.sig.dif1<-rbind(group.sample.dif%>%top_n(n = 10,wt = avg_log2FC),
                             group.sample.dif%>%top_n(n = -10,wt = avg_log2FC))

局部区域选取

#情况一：提取亚群区域
C1_C3 <- subset(st,idents = c("1","3"))
SpatialDimPlot(C1_C3, crop = FALSE, label = TRUE, pt.size.factor = 1, label.size = 3)

#情况二：提取矩形区域
#将图形坐标提取到meat.data中，方便局部区域筛选
anterior[["ant_imagerow"]] <- anterior@images[["anterior"]]@coordinates[["imagerow"]]
anterior[["ant_imagecol"]] <- anterior@images[["anterior"]]@coordinates[["imagecol"]]

#提取方法
cortex <- subset(anterior,ant_imagerow > 5000 & ant_imagecol < 9000)
cortex <- subset(cortex,ant_imagerow < 9000 & ant_imagecol > 5000)
#从全局查看提取结果
SpatialDimPlot(cortex, crop = FALSE, label = TRUE, pt.size.factor = 1, label.size = 3)

cortex <- NormalizeData(cortex,normalization = "LogNormalize")
cortex <- FindVariableFeatures(cortex,selection.method = "vst",verbose = F)
cortex <- ScaleData(cortex,features = rownames(cortex))
cortex <- RunPCA(cortex,npcs = 50)
ElbowPlot(cortex)
#cortex <- SCTransform(cortex,assay = "Spatial")  #3min
#cortex <- RunPCA(cortex)

#基于PCA对Spot进行聚类
cortex <- FindNeighbors(cortex,dims = 1:10)
cortex <- FindClusters(cortex,resolution = 0.4)

#查看局部区域的分群结果
p1 <- SpatialDimPlot(cortex, crop = TRUE, label = TRUE)
p2 <- SpatialDimPlot(cortex, crop = FALSE, label = TRUE, pt.size.factor = 1, label.size = 3)
p1+p2
#同样可以用tsne和umap展示局部分群聚类结果

#提取局部区域
cortex_C1_C2 <- subset(cortex,idents = c("0","1"))
SpatialDimPlot(cortex_C1_C2, crop = FALSE, label = TRUE, pt.size.factor = 1, label.size = 3)

分群注释（解剖学图谱-区域注释）或（Mark基因注释或利用单细胞转录组数据反卷积）

# 单细胞数据
brain <- readRDS("brain.rds")
brain <- SCTransform(brain,ncells=3000,verbose=FALSE) %>% RunPCA(verbose=F) %>% RunUMAP(dims=1:30)
DimPlot(brain,group.by="subclass",label=T)

# 锚点转换
anchors <- FindTransferAnchors(reference = brain, query = st, 
                               normalization.method = "SCT")
predictions.assay <- TransferData(anchorset = anchors, 
                                  refdata = brain$subclass, 
                                  prediction.assay = TRUE,
                                  weight.reduction = st[["pca"]], 
                                  dims = 1:30)
st[["predictions"]] <- predictions.assay
# 结果展示
DefaultAssay(st) <- "predictions"
SpatialFeaturePlot(st, features = "Macrophage", 
                  pt.size.factor = 1.6, ncol = 3, crop = TRUE)

根据解剖图谱进行注释

主流工具

功能富集、拟时序、细胞通讯等主流分析工具单细胞转录组分析使用的工具，空间转录组数据也同样适用。

一些数据库

http://atlas.brain-map.org/ 脑解剖学图谱
https://www.10xgenomics.com/resources/datasets 10x官网数据
http://egastrulation.sibcb.ac.cn/ 早期胚胎
https://www.spatialomics.org/SpatialDB 
https://db.cngb.org/stomics/datasets/
https://www.spatialresearch.org/
https://nanostring.com/products/geomx-digital-spatial-profiler/spatial-organ-atlas/

朴素的科研打工仔

专注于文献的分享，浙大研究生学习生活的记录。