复现:Lineage-specific regulatory changes in hypertrophic cardiomyopathy unraveled by single-nucleus RNA-seq and spatial transcriptomics.Cell discovery.2023
import stlearn as st
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import os
os.getcwd()
# 读取空转数据
adata_dct = {}
for i in Path("../").glob("rawdata_st/*"):
_s = str(i).split('_')[2]
_a = sc.read_visium(i,library_id=_s)
_a.obs.index = [_s + "_" + bc[0:len(bc)] for bc in _a.obs.index.tolist()]
_a.var_names_make_unique()
adata_dct[_s] = _a
st_adata = sc.concat(adata_dct,label="library_id",uns_merge="unique")
st.pp.filter_genes(st_adata, min_cells=5)
st.pp.normalize_total(st_adata)
# 载入cell2location或RCTD注释文件
spot_mixtures = pd.read_csv('../st_cell2location_res.csv', index_col=0) # 或RCTD文件
# 修改字符
spot_mixtures['predicted.id'] = spot_mixtures['predicted.id'].str.replace('q05cell_abundance_w_sf_', '')
labels = spot_mixtures.loc[:,'predicted.id'].values.astype(str)
spot_mixtures = spot_mixtures.drop(['predicted.id','max.score'],axis=1)
spot_mixtures.columns = [col.replace('q05cell_abundance_w_sf_', '')
for col in spot_mixtures.columns]
spot_mixtures = spot_mixtures.loc[st_adata.obs.index,:]
# 查看细胞名是否一致
print('Spot mixture order correct?: ',
np.all(spot_mixtures.index.values==st_adata.obs_names.values))
st_adata.obs['cell_type'] = labels
st_adata.obs['cell_type'] = st_adata.obs['cell_type'].astype('category')
st_adata.uns['cell_type'] = spot_mixtures
fig, axs = plt.subplots(1, 5, figsize=(20, 10))
for i, library in enumerate(
['D1','D2',"H1","D3","H2"]
):
ad = st_adata[st_adata.obs.library_id == library, :].copy()
sc.pl.spatial(
ad,
img_key="hires",
library_id=library,
color=['cell_type'],
size=1.5,
color_map='magma',
ax=axs[i],
legend_loc='right margin',
frameon=False
)
plt.tight_layout()
lrs = st.tl.cci.load_lrs(['connectomeDB2020_lit'], species='human')
print(len(lrs))
2293
R
CellChatDB <- CellChatDB.human # set CellChatDB <- CellChatDB.mouse 调用小鼠数据库
interaction_input <- CellChatDB$interaction
complex_input <- CellChatDB$complex
cofactor_input <- CellChatDB$cofactor
geneInfo <- CellChatDB$geneInfo
write.csv(interaction_input, file = "interaction_input_CellChatDB.csv")
write.csv(complex_input, file = "complex_input_CellChatDB.csv")
write.csv(cofactor_input, file = "cofactor_input_CellChatDB.csv")
write.csv(geneInfo, file = "geneInfo_input_CellChatDB.csv")
lrs_cc = pd.read_csv('../interaction_input_CellChatDB.csv', index_col=0)
# stlearn目前配受体信息不能用多聚体形式,所以cellchat信息需自己调整
lrs_cc.index=lrs_cc['pathway_name']
# 选取特定通路上的配受体
lrs_cc = lrs_cc.query('pathway_name == "FGF"| pathway_name== "CXCL"|pathway_name== "NOTCH"')
lrs_cc = lrs_cc.loc[:,'interaction_name'].values.astype(str)
# 修改基因名-解决报错
st_adata.var.index =st_adata.var.index.str.replace('_', '')
# 转化对象-解决报错
st_adata = st.convert_scanpy(st_adata)
# 运行配体-受体分析
st.tl.cci.run(st_adata, lrs_cc,
min_spots = 5, #Filter out any LR pairs with no scores for less than min_spots
distance=None, # None defaults to spot+immediate neighbours; distance=0 for within-spot mode
n_pairs=10000, # Number of random pairs to generate; low as example, recommend ~10,000
n_cpus=4, # Number of CPUs for parallel. If None, detects & use all available.
)
lr_info = st_adata.uns['lr_summary']
# P值调整
st.tl.cci.adj_pvals(st_adata, correct_axis='spot',
pval_adj_cutoff=0.05, adj_method='fdr_bh')
lr= pd.DataFrame(st_adata.obsm['lr_scores'],index=st_adata.obs.index,columns=st_adata.uns['lr_summary'].index)
lr['interaction_score'] = lr.apply(lambda x: x.sum(), axis=1)
st_adata.X.todense()[:,0]
st_adata[:,0].X = csr_matrix(lr['interaction_score']).T
st_adata.X.todense()[:,0]
# 将score值替换某个基因表达量
st_adata.X.todense()[:,0]
st_adata[:,0].X = csr_matrix(lr['interaction_score']).T
st_adata.X.todense()[:,0]
st_adata.var.index[0]
# 提取其中某一样本进行可视化
ad = st_adata[st_adata.obs.library_id == 'H1', :].copy()
a = ad.uns.get('spatial').get('H1')
ad.uns['spatial'] = {}
ad.uns['spatial']['H1'] = a
st.pl.gene_plot(ad, gene_symbols=st_adata.var.index[0], contour=True,cell_alpha=0.5)