偷偷问一下,关注了吗?
内容获取
1、购买打包合集(《KS科研分享与服务》付费内容打包集合),价格感人,可以加入微信VIP群(答疑交流群,甚至有小伙伴觉得群比代码更好),可以获取建号以来所有内容,群成员专享视频教程,提前更新,其他更多福利!
2、《KS科研分享与服务》公众号有QQ群,进入门槛是20元(完全是为了防止白嫖党,请理解),请考虑清楚。群里有免费推文的注释代码和示例数据(终身拥有),没有付费内容,群成员福利是购买单个付费内容半价!
需要者详情请联系作者(非需要者勿扰,处理太费时间):
我们介绍的是scATAC联合scRNA数据分析,那么scRNA必不可少,我们选择的数据相同的分组也测了scRNA,所以还是回顾一下,跑一下scRNA流程,做好细胞分群和注释吧,一方面注释好的scRNA可用于scATAC数据注释,另一方面两者可以联合分析做一些内容。话不多说,都很熟悉,直接上代码:上游分析不再多说!
setwd('./Documents/KS_TS/signac_scATAC/scRNA_for_inter/')
#####################################
# load packages
#####################################
library(dplyr)
library(Seurat)
library(patchwork)
library(stringr)
library(ggplot2)
#####################################
# process data
#####################################
#批量读入
data_dirs <- list.dirs(path='.', full.names=TRUE, recursive=FALSE)
samples <- c("AA","HC","SD")
objs <- list()
for(ix in seq_along(data_dirs)){
sample <- samples[ix]
path <- data_dirs[ix]
data <- Read10X(data.dir=path)
obj <- CreateSeuratObject(counts=data, project=sample, min.cells=3, min.features=200) # orig.ident not getting assigned correctly?
obj$orig.ident <- sample
objs[[sample]] <- obj
}
#线粒体基因比例
for(i in seq_along(objs)){
objs[[i]][["percent.mt"]] <- PercentageFeatureSet(objs[[i]], pattern = "^MT-")
}
#filter cells
for(i in seq_along(objs)){
objs[[i]] <- subset(objs[[i]],
subset = (
nFeature_RNA > 200 &
nFeature_RNA < Inf &
nCount_RNA > 1000 &
nCount_RNA < Inf &
percent.mt < 20
))
}
#run cluster for each sample
seuratPreProcess <- function(obj, selectionMethod="vst", nFeatures=2000, dims=1:10, res=0.5, seed=1){
# perform standard Seurat preprocessing
# (Normalization, Scaling, Dimensionality Reduction, Clustering)
obj <- NormalizeData(obj)
obj <- ScaleData(obj, verbose=FALSE)
obj <- FindVariableFeatures(obj, selection.method=selectionMethod, nfeatures=nFeatures)
obj <- RunPCA(obj)
obj <- RunUMAP(obj, dims=dims)
obj <- FindNeighbors(obj, dims=dims)
obj <- FindClusters(obj, resolution=res, random.seed=1)
return(obj)
}
objs <- lapply(seq_along(objs), function(i){
obj <- objs[[i]]
seuratPreProcess(obj)
})
names(objs) <- samples
去双细胞,重新分群,注释细胞。
#####################################
# Doublet cells detect
#####################################
source('./ks_detectDoublet')
objs[[1]] <- ks_detectDoublet(objs[[1]],dims = 1:15,estDubRate=0.065,
ncores = 2, SCTransform=F, Homotypic=F, annotation="seurat_clusters")
objs[[2]] <- ks_detectDoublet(objs[[2]],dims = 1:15,estDubRate=0.025,
ncores = 2, SCTransform=F, Homotypic=F, annotation="seurat_clusters")
objs[[3]] <- ks_detectDoublet(objs[[3]],dims = 1:15,estDubRate=0.060,
ncores = 2, SCTransform=F, Homotypic=F, annotation="seurat_clusters")
#####################################
# re-cluster
#####################################
# Merge individual Seurat objects
obj <- merge(x=objs[[1]], y=objs[2:length(objs)], add.cell.ids=names(objs))
# Store sample information in metadata
obj$Sample <- obj$orig.ident
# Store disease status in metadata
# obj$diseaseStatus <- NA
# obj$diseaseStatus <- ifelse(grepl("C_SD", obj$Sample), "C_SD", obj$diseaseStatus)
# obj$diseaseStatus <- ifelse(grepl("C_PB", obj$Sample), "C_PB", obj$diseaseStatus)
# obj$diseaseStatus <- ifelse(grepl("AA", obj$Sample), "AA", obj$diseaseStatus)
# Remove doublets
obj <- subset(obj, subset = (DF.classify == "Singlet"))
#cluster
obj <- NormalizeData(obj, normalization.method = "LogNormalize") %>% FindVariableFeatures(., selection.method = "vst", nfeatures = 3000)
obj <- ScaleData(obj, vars.to.regress = c("percent.mt"), verbose = T)
obj <- RunPCA(obj, npcs = 50, verbose = FALSE)
#run harmony
library(harmony)
obj <- RunHarmony(obj, "orig.ident")
obj <- FindNeighbors(obj, reduction = "harmony",dims = 1:25)
obj <- FindClusters(object = obj , resolution = seq(from = 0.2, to = 1.0, by = 0.2))
obj <- RunUMAP(obj, dims = 1:25, reduction = "harmony")
# library(clustree)
# clustree(obj)
Idents(obj) <- "RNA_snn_res.0.4"
obj$seurat_clusters <- obj@active.ident
DimPlot(obj,label = T)
save(obj, file = 'obj.RData')
#####################################
# decontX
#####################################
source('./ks_runDecontX')
obj <- ks_runDecontX(seurat_obj = obj,idents = "seurat_clusters")
#根据contamination分数删除细胞,至于比例,没有明确的要求,可以按照实际数据处理来设定
obj = obj[,obj$estConp < 0.15]
#####################################
# cell annotation
#####################################
featureSets <- list(
"Keratinocytes" = c("KRT5", "KRT10", "KRT14", "KRT15"),
"Fibroblasts" = c("THY1", "COL1A1", "COL11A1"),
"T_cells" = c("CD3D", "CD8A", "CD4","IKZF2", "CCL5"), # PTPRC = CD45
"B_cells" = c("CD19"), # MS41A = CD20, MZB1 = marginal zone B cell specific protein
"APCs" = c("CD14", "CD86", "CD74", "CD163"), # Monocyte lineage (FCGR3A = CD16, FCGR1A = CD64, CD74 = HLA-DR antigens-associated invariant chain)
"Melanocytes" = c("MITF", "SOX10", "MLANA"), # Melanocyte markers
"Endothlial" = c("VWF", "PECAM1", "SELE"), # Endothlial cells (PECAM1 = CD31), SELE = selectin E (found in cytokine stimulated endothelial cells)
"Lymphatic" = c("FLT4", "LYVE1"), # Lymphatic endothelial (FLT4 = VEGFR-3))
"Muscle" = c("TPM1", "TAGLN", "MYL9"), # TPM = tropomyosin, TAGLN = transgelin (involved crosslinking actin in smooth muscle), MYL = Myosin light chain
"Mast_cells" = c("KIT", "TPSB2", "HPGD"), # KIT = CD117, ENPP3 = CD203c, FCER1A = IgE receptor alpha, TPSB2 is a beta tryptase, which are supposed to be common in mast cells
"HF_surface_markers" = c("ITGB8", "CD200", "SOX9", "LHX2")
)
markers <- unlist(featureSets,use.names = F)
DotPlot(obj, features = markers,col.min = 0)+coord_flip()
table(obj$seurat_clusters)
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# 2713 2524 1875 1371 1685 1241 1107 1027 64 759 572 480 289 210 212 123 97 78 61 52
obj <- obj[,!obj@meta.data$seurat_clusters %in% c("8","18","19")]
new.cluster.ids <- c("0"="Fibroblasts",
"1"="Endothlial",
"2"="Keratinocytes",
"3"="Myeloid",
"4"="Keratinocytes",
"5"="Muscle",
"6"="Fibroblasts",
"7"="Muscle",
"9"="T cells",
"10"="Keratinocytes",
"11"="Mast cells",
"12"="Keratinocytes",
"13"="Myeloid",
"14"="Lymphatic",
"15"="Melanocytes",
"16"="Keratinocytes",
"17"="Myeloid")
obj <- RenameIdents(obj, new.cluster.ids)
obj$celltype <- obj@active.ident
DimPlot(obj, label = T)+NoLegend()
DimPlot(obj, label = T, split.by = 'orig.ident')+NoLegend()
save(obj, file = 'obj.RData')
关注我们获取精彩内容:
关注不迷路:扫描下面二维码关注公众号!
B站视频号链接:https://space.bilibili.com/471040659?spm_id_from=333.1007.0.0
关注 KS科研分享与服务,
认清正版优质内容和服务!
优质内容持续输出,物超所值!
合作联系:ks_account@163.com