Spatial proteogenomics reveals distinct and
evolutionarily conserved hepatic macrophage
niches,Cell,2022 Single-Cell, Single-Nucleus, and
Spatial RNA Sequencing of the Human
Liver Identifies Cholangiocyte and
Mesenchymal Heterogeneity,Hepatology communications,2021 数据读取-空转 library(Seurat) library(ggplot2) library(stringr) library(harmony) library(patchwork) library(dplyr) #读取数据 空间转录组部分 ## 批量读取数据 assays <- dir("rawdata_st/" ) dir <- paste0("rawdata_st/" , assays) samples_name = c('D1' ,'D2' ,'H1' ,'D3' ,'H2' ) spatial.list = list()for (i in 1 :length(dir)){ spatial <- Load10X_Spatial(data.dir = dir[i],slice = samples_name[i]) spatial$orig.ident <- samples_name[i] spatial$group <- stringr::str_extract_all(samples_name[i],"[aA-zZ]+" ) spatial <- RenameCells(spatial, add.cell.id = samples_name[i]) spatial [["percent.mt" ]] <- PercentageFeatureSet(spatial, pattern = "^MT[-]" ) spatial.list[[samples_name[i]]] <- spatial } p_mt.list <- lapply(spatial.list, function(x){ p1 <- VlnPlot(x, features = "percent.mt" ) + ggtitle(unique(x$orig.ident)) + theme(legend.position = "none" , axis.text.x = element_blank()) p2 <- SpatialFeaturePlot(x, features = "percent.mt" ) + theme(legend.position = "right" ) p <- p1|p2 p }) wrap_plots(p_mt.list, ncol = 2 ) # 整合样本 stRNA <- merge(spatial.list[[1 ]], spatial.list[2 :length(spatial.list)]) # 标准化 stRNA <- SCTransform(stRNA, assay = "Spatial" , verbose = T) ## 降维聚类 stRNA <- RunPCA(stRNA, assay = "SCT" , verbose = T) ElbowPlot(stRNA, ndims=30 , reduction="pca" ) stRNA = RunHarmony(stRNA, group.by.vars = "orig.ident" ,assay.use = "SCT" ) stRNA <- RunUMAP(stRNA, reduction = "harmony" , dims = 1 :20 ) stRNA <- FindNeighbors(stRNA, reduction = "harmony" , dims = 1 :20 ) stRNA <- FindClusters(stRNA, verbose = T,resolution = 0.5 ) ## 可视化 p1 <- DimPlot(stRNA, reduction = "umap" , label = TRUE,group.by = 'SCT_snn_res.0.5' ,pt.size = 1 ) p1 p2 <- SpatialDimPlot(stRNA,label = T) p2 p1/p2
数据读取-单细胞 #读取数据 单细胞部分 ## 批量读取数据 setwd("./rawdata_sc/" ) fs=list.files(pattern = '.h5' ) samples_name = c('Ob1' ,'Ob2' ,'Ob3' ,'Ob4' ,'Ob5' ,'Ob6' ,'Le1' ,"Le2" ) scRNA.list <- list()for (i in 1 :length(fs)){ counts <- Read10X_h5(fs[i]) sample=str_split(fs[i],'_' ,simplify = T)[,1 ] scRNA <- CreateSeuratObject(counts, project=samples_name[i], min.cells=3 , min.features = 200 ) scRNA <- RenameCells(scRNA, add.cell.id = samples_name[i]) scRNA$group <- stringr::str_extract_all(samples_name[i],"[aA-zZ]+" ) scRNA.list[[samples_name[i]]] <- scRNA } scRNA <- merge(scRNA.list[[1 ]], scRNA.list[2 :length(scRNA.list)]) # 计算细胞中线粒体基因比例 scRNA[["percent.mt" ]] <- PercentageFeatureSet(scRNA, pattern = "^MT-" ) # 计算细胞中核糖体基因比例 scRNA[["percent.rb" ]] <- PercentageFeatureSet(scRNA, pattern = "^RP[LS]" ) # 质控 scRNA <- subset(scRNA, nCount_RNA<8000 &nFeature_RNA>300 &percent.mt<25 ) # 标准化 scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize" , scale.factor = 10000 ) # 鉴定高变基因 scRNA <- FindVariableFeatures(scRNA, selection.method = "vst" , nfeatures = 3000 ) # 尺度变换 scRNA <- ScaleData(scRNA, features = rownames(scRNA)) # 降维 scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA)) scRNA <- RunHarmony(scRNA, group.by.vars="orig.ident" , max.iter.harmony = 20 ) scRNA <- RunUMAP(scRNA,reduction = "harmony" , dims = 1 :30 , verbose = FALSE) ## 确定分辨率 scRNA <- FindNeighbors(scRNA, reduction = "harmony" ,dims = 1 :30 , verbose = FALSE)for (res in c (0.01 , 0.05 , 0.1 , 0.2 , 0.3 , 0.5 ,0.8 ,1 ) ) { print(res) scRNA <- FindClusters(scRNA, graph.name = "RNA_snn" , resolution = res, algorithm = 1 ) } colors <- c('#e41a1c' ,'#377eb8' ,'#4daf4a' ,'#984ea3' ,'#ff7f00' ,'#ffff33' ,'#a65628' ,'#f781bf' ) names(colors) <- unique(colnames(scRNA@meta .data)[grep("RNA_snn_res" , colnames(scRNA@meta .data))]) library(clustree) clustree(scRNA@meta .data, prefix = "RNA_snn_res." ) Idents(scRNA) <- "RNA_snn_res.0.3" DimPlot(scRNA,label = T) # 细胞类型鉴定 DefaultAssay(scRNA) <- "RNA" library(COSG) marker_cosg <- cosg(scRNA,groups='all' ,assay='RNA' ,slot='data' ,mu=1 ,n_genes_user=100 )new .cluster.ids <- c("Hep1" ,"Hep2" , "Hep3" ,"Hep4" , "LSEC" ,"Kup" ,"HSC" ,"NKT" ,"Cho" , "UN1" , "CV_EC" , "UN2" ,"Plasma" ) names(new .cluster.ids) <- levels(scRNA) scRNA <- RenameIdents(scRNA, new .cluster.ids) table(scRNA@active .ident) scRNA$celltype <- scRNA@active .ident # 可视化 Dimplot(scRNA,label=T)
去卷积-RCTD # RCTD ## 单细胞参考数据库构建 # 整体 library(spacexr) library(tidyverse) sc_counts <- as.matrix(scRNA[['RNA' ]]@counts ) sc_nUMI = colSums(sc_counts) cellType=data.frame(barcode=colnames(scRNA), celltype=scRNA$celltype) names(cellType) = c('barcode' , 'cell_type' ) cell_types = cellType$cell_type; names(cell_types) <- cellType$barcode cell_types =as.factor(cell_types) cell_types reference = Reference(sc_counts, cell_types, sc_nUMI) # 拆分疾病和正常数据库 scRNA_Ob=subset(scRNA,group == 'Ob' ) sc_counts <- as.matrix(scRNA_Ob[['RNA' ]]@counts ) sc_nUMI = colSums(sc_counts) cellType=data.frame(barcode=colnames(scRNA_Ob), celltype=scRNA_Ob$celltype) names(cellType) = c('barcode' , 'cell_type' ) cell_types = cellType$cell_type; names(cell_types) <- cellType$barcode cell_types =as.factor(cell_types) cell_types reference_D = Reference(sc_counts, cell_types, sc_nUMI) scRNA_Le=subset(scRNA,group == 'Le' ) scRNA_Le=subset(scRNA_Le,celltype != 'UN1' ) scRNA_Le$celltype=factor(scRNA_Le$celltype) sc_counts <- as.matrix(scRNA_Le[['RNA' ]]@counts ) sc_nUMI = colSums(sc_counts) cellType=data.frame(barcode=colnames(scRNA_Le), celltype=scRNA_Le$celltype) names(cellType) = c('barcode' , 'cell_type' ) cell_types = cellType$cell_type; names(cell_types) <- cellType$barcode cell_types =as.factor(cell_types) reference_H = Reference(sc_counts, cell_types, sc_nUMI)
# 拆分空间样本一一去卷积 --对应整体单细胞数据库 sto.list <- SplitObject(stRNA, split.by = "orig.ident" ) slice <- names(sto.list) result.RCTD=data.frame()for (i in slice){ coords <- stRNA@images [[i]]@coordinates [,c(4 ,5 )] colnames(coords) <- c("x" ,"y" ) query<- SpatialRNA(coords = coords, counts = sto.list[[i]]@assays $Spatial@counts , nUMI = structure(sto.list[[i]]$nCount_Spatial, names=colnames(sto.list[[i]]))) RCTD <- create.RCTD(spatialRNA = query, reference = reference, max_cores = 10 ) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet' ) result_df <- RCTD@results $results_df result_df <- as.data.frame(result_df) result.RCTD=result.RCTD%>%rbind(result_df) } stRNA1=stRNA[,rownames(result.RCTD)] stRNA1 <- AddMetaData(stRNA1,metadata = result.RCTD) Idents(stRNA1)=stRNA1$second_type p1<-SpatialDimPlot(stRNA1) # 拆分空间样本一一去卷积 --对应疾病和正常 result.RCTD_group=data.frame()for (i in slice){ if (str_extract_all(i,"[aA-zZ]+" )=='H' ){ coords <- stRNA@images [[i]]@coordinates [,c(4 ,5 )] colnames(coords) <- c("x" ,"y" ) query<- SpatialRNA(coords = coords, counts = sto.list[[i]]@assays $Spatial@counts , nUMI = structure(sto.list[[i]]$nCount_Spatial, names=colnames(sto.list[[i]]))) RCTD <- create.RCTD(spatialRNA = query, reference = reference_H, max_cores = 10 ) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet' ) result_df <- RCTD@results $results_df result_df <- as.data.frame(result_df) } else { coords <- stRNA@images [[i]]@coordinates [,c(4 ,5 )] colnames(coords) <- c("x" ,"y" ) query<- SpatialRNA(coords = coords, counts = sto.list[[i]]@assays $Spatial@counts , nUMI = structure(sto.list[[i]]$nCount_Spatial, names=colnames(sto.list[[i]]))) RCTD <- create.RCTD(spatialRNA = query, reference = reference_D, max_cores = 10 ) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet' ) result_df <- RCTD@results $results_df result_df <- as.data.frame(result_df) } result.RCTD_group=result.RCTD_group%>%rbind(result_df) } stRNA2=stRNA[,rownames(result.RCTD_group)] stRNA2 <- AddMetaData(stRNA2,metadata = result.RCTD_group) Idents(stRNA2)=stRNA2$second_type p2<-SpatialDimPlot(stRNA2)
区分正常和疾病组的数据库进行去卷积效果会优于整个数据库 基因集评分 ### 读取基因列表 geneset=read.table('../fatty.txt' ,header = T,check.names = F,sep = '\t' ) geneset=list(geneset$GT) names(geneset)='GTs' df_forGMT <- data.frame( geneset=c("geneset" ,geneset$GTs) ) write.table(t(df_forGMT),"geneset.gmt" ,quote=F,sep="\t" ,row.names=T,col.names=F) ## scMetabolsimlibrary (scMetabolism) library (rsvd) scRNA<-sc.metabolism.Seurat (obj = scRNA, method = "VISION" , imputation = F, ncores = 1 , metabolism.type = "KEGG" ) DimPlot.metabolism (obj = scRNA, pathway = "geneset" , dimention.reduction.type = "umap" , dimention.reduction.run = F, size = 1 ) vision <- as.data.frame (scRNA@assays$METABOLISM$score) vision =as.data.frame(t(vision)) ## AUCelllibrary (AUCell) cells_rankings <- AUCell_buildRankings (scRNA@assays$RNA@data, nCores=1 , plotStats=TRUE) cells_AUC <- AUCell_calcAUC (geneset, cells_rankings,nCores =1 , aucMaxRank=nrow(cells_rankings) *0.1) aucs <- as.numeric (getAUC(cells_AUC) ['GTs', ]) ## Ucells & singscore & ssgsealibrary (irGSEA) scRNA <- irGSEA.score (object = scRNA, assay = "RNA" , slot = "data" , seeds = 123 , ncores = 1 ,msigdb = F, custom = T, geneset = geneset, method = c('UCell' ,'singscore' ,'ssgsea' ) , kcdf = 'Gaussian' ) uc=as.data.frame(scRNA@assays $UCell@counts ) uc=as.data.frame(t(uc)) singscore=as.data.frame(scRNA@assays $singscore@counts ) singscore=as.data.frame(t(singscore)) ssgsea=as.data.frame(scRNA@assays $ssgsea@counts ) ssgsea=as.data.frame(t(ssgsea)) ## addmodulescore scRNA=AddModuleScore(scRNA,features = geneset,name = 'Add' ) # 组合 score=data.frame(AUCell=aucs,UCell=uc$GTs,singscore=singscore$GTs,ssgsea=ssgsea$GTs,Add=scRNA$Add1,vision=vision$geneset) # scale 标准化 score<- scale(score) # 0 -1 标准化 normalize=function(x){ return ((x-min(x))/(max(x)-min(x)))} score=apply(score, 2 , normalize) score=as.data.frame(score) score$Scoring=rowSums(score) scRNA$Add1=NULL scRNA@meta .data=cbind(scRNA@meta .data,score) # 可视化 DotPlot(scRNA,features = colnames(score)) + RotatedAxis() + theme(axis.text.x = element_text(angle = 45 , face="italic" , hjust=1 ), axis.text.y = element_text(face="bold" )) + theme(legend.position="right" ) + labs(title = "cluster markers" , y = "" , x="" )
多种基因集评分手段比较 # 关注细胞类型 scRNA_Hep <- subset(scRNA,celltype=="Hep3" ) scRNA_Hep$FAT_group=ifelse(scRNA_Hep$Scoring > 2.5 ,'FAT_highHEP' ,'FAT_lowHEP' ) # 进行空间定位 Idents(scRNA_Hep) <- "FAT_group" sc_counts <- as.matrix(scRNA_Hep[['RNA' ]]@counts ) sc_nUMI = colSums(sc_counts) cellType=data.frame(barcode=colnames(scRNA_Hep), celltype=scRNA_Hep$FAT_group) names(cellType) = c('barcode' , 'cell_type' ) cell_types = cellType$cell_type; names(cell_types) <- cellType$barcode cell_types =as.factor(cell_types) reference_FAT = Reference(sc_counts, cell_types, sc_nUMI) result.RCTD=data.frame()for (i in slice){ coords <- stRNA@images [[i]]@coordinates [,c(4 ,5 )] colnames(coords) <- c("x" ,"y" ) query<- SpatialRNA(coords = coords, counts = sto.list[[i]]@assays $Spatial@counts , nUMI = structure(sto.list[[i]]$nCount_Spatial, names=colnames(sto.list[[i]]))) RCTD <- create.RCTD(spatialRNA = query, reference = reference, max_cores = 10 ) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet' ) result_df <- RCTD@results $results_df result_df <- as.data.frame(result_df) result.RCTD=result.RCTD%>%rbind(result_df) } #stRNA_FAT=stRNA[,rownames(result.RCTD)] #stRNA_FAT <- AddMetaData(stRNA_FAT,metadata = result.RCTD) #Idents(stRNA)=stRNA$second_type #DimPlot(stRNA) #SpatialDimPlot(stRNA)
# 可能是因为样本来源不同的原因,单细胞的发现无法在空间中定位
stRNA=AddModuleScore(stRNA,features = geneset,name = 'Add' )
SpatialFeaturePlot(stRNA, features = "Add1" , pt.size.factor = 1 , crop = FALSE, alpha = c(0.1 ,1 ), min.cutoff = 0 , max.cutoff = 1 )