线上课程教学
课题设计、定制生信分析
云服务器租赁
加微信备注99领取使用
跑代码时卡顿、电脑不给力让人抓狂!找果叔试用稳定高速的服务器,让分析顺畅无比!
代码学不会?bug 频繁出现,束手无策?实操生信分析课程赶快学起来!滴滴果叔领取体验课程哦~
线上课程教学
课题设计、定制生信分析
云服务器租赁
加微信备注99领取使用
conda create -n SpatialGlue python=3.8
conda activate SpatialGlue
pip install SpatialGlue
##导入相关的包
import os
import torch
import pandas as pd
import scanpy as sc
import SpatialGlue
# 环境配置。SpatialGlue包可以用CPU或GPU实现。强烈推荐GPU加速以提高效率。
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
# R的位置,这是“mclust”聚类算法所需要的。请将下面的路径替换为本地R安装路径
os.environ['R_HOME'] = '/scbio4/tools/R/R-4.0.3_openblas/R-4.0.3'
# 读取数据
file_fold = '/home/yahui/anaconda3/work/SpatialGlue_revision/data/Dataset3_Mouse_Thymus1/' #please replace 'file_fold' with the download path
adata_omics1 = sc.read_h5ad(file_fold + 'adata_RNA.h5ad')
adata_omics2 = sc.read_h5ad(file_fold + 'adata_protein.h5ad')
adata_omics1.var_names_make_unique()
adata_omics2.var_names_make_unique()
#指定数据类型
data_type = 'Stereo-CITE-seq'
#固定随机种子
from SpatialGlue.preprocess import fix_seed
random_seed = 2022
fix_seed(random_seed)
from SpatialGlue.preprocess import clr_normalize_each_cell, pca
# RNA
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.filter_cells(adata_omics1, min_genes=80)
sc.pp.filter_genes(adata_omics2, min_cells=50)
adata_omics2 = adata_omics2[adata_omics1.obs_names].copy()
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
adata_omics1_high = adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = pca(adata_omics1_high, n_comps=adata_omics2.n_vars-1)
# 蛋白质
adata_omics2 = clr_normalize_each_cell(adata_omics2)
adata_omics2.obsm['feat'] = pca(adata_omics2, n_comps=adata_omics2.n_vars-1)
from SpatialGlue.preprocess import construct_neighbor_graph
data = construct_neighbor_graph(adata_omics1, adata_omics2, datatype=data_type)
# 定义模型
from SpatialGlue.SpatialGlue_pyG import Train_SpatialGlue
model = Train_SpatialGlue(data, datatype=data_type, device=device)
# 训练模型
output = model.train()
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1']
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2']
adata.obsm['SpatialGlue'] = output['SpatialGlue']
adata.obsm['alpha'] = output['alpha']
adata.obsm['alpha_omics1'] = output['alpha_omics1']
adata.obsm['alpha_omics2'] = output['alpha_omics2']
6. 跨组学整合分析
集成后,使用mclust进行聚类分析。在这里,我们提供了三种可选的聚类工具,包括 mclust、leiden 和 louvain。在我们的实验中,果叔发现在大多数情况下,“mclust”算法在空间数据上的表现优于“leiden”和“louvain”。因此,果叔建议使用“mclust”算法进行聚类。
# 我们将“mclust”默认设置为聚类工具。用户还可以选择“leiden”和“louvain”。
tool = 'mclust' # mclust, leiden, and louvain
clustering(adata, key='SpatialGlue', add_key='SpatialGlue', n_clusters=8, method=tool, use_pca=True)
#可视化
import matplotlib.pyplot as plt
adata.obsm['spatial'][:,1] = -1*adata.obsm['spatial'][:,1]
fig, ax_list = plt.subplots(1, 2, figsize=(7, 3))
sc.pp.neighbors(adata, use_rep='SpatialGlue', n_neighbors=30)
sc.tl.umap(adata)
sc.pl.umap(adata, color='SpatialGlue', ax=ax_list[0], title='SpatialGlue', s=20, show=False)
sc.pl.embedding(adata, basis='spatial', color='SpatialGlue', ax=ax_list[1], title='SpatialGlue', s=20, show=False)
plt.tight_layout(w_pad=0.3)
plt.show()
# 注释
adata.obs['SpatialGlue_number'] = adata.obs['SpatialGlue'].copy()
adata.obs['SpatialGlue'].cat.rename_categories({1: '5-Outer cortex region 3(DN T,DP T,cTEC)',
2: '7-Subcapsular zone(DN T)',
3: '4-Middle cortex region 2(DN T,DP T,cTEC)',
4: '2-Corticomedullary Junction(CMJ)',
5: '1-Medulla(SP T,mTEC,DC)',
6: '6-Connective tissue capsule(fibroblast)',
7: '8-Connective tissue capsule(fibroblast,RBC,myeloid)',
8: '3-Inner cortex region 1(DN T,DP T,cTEC)'
}, inplace=True)
list_ = ['3-Inner cortex region 1(DN T,DP T,cTEC)','2-Corticomedullary Junction(CMJ)','4-Middle cortex region 2(DN T,DP T,cTEC)',
'7-Subcapsular zone(DN T)', '5-Outer cortex region 3(DN T,DP T,cTEC)', '8-Connective tissue capsule(fibroblast,RBC,myeloid)',
'1-Medulla(SP T,mTEC,DC)','6-Connective tissue capsule(fibroblast)']
adata.obs['SpatialGlue'] = pd.Categorical(adata.obs['SpatialGlue'],
categories=list_,
ordered=True)
# 绘图
fig, ax_list = plt.subplots(1, 2, figsize=(9.5, 3))
sc.pp.neighbors(adata, use_rep='SpatialGlue', n_neighbors=30)
sc.tl.umap(adata)
sc.pl.umap(adata, color='SpatialGlue', ax=ax_list[0], title='SpatialGlue', s=10, show=False)
sc.pl.embedding(adata, basis='spatial', color='SpatialGlue', ax=ax_list[1], title='SpatialGlue', s=20, show=False)
ax_list[0].get_legend().remove()
plt.tight_layout(w_pad=0.3)
plt.show()
#交换与注释对应的注意权重
list_SpatialGlue = [5,4,8,3,1,6,2,7]
adata.obs['SpatialGlue_number'] = pd.Categorical(adata.obs['SpatialGlue_number'],
categories=list_SpatialGlue,
ordered=True)
adata.obs['SpatialGlue_number'].cat.rename_categories({5:1,
4:2,
8:3,
3:4,
1:5,
6:6,
2:7,
7:8
}, inplace=True)
# 绘制模态权重值
import pandas as pd
import seaborn as sns
plt.rcParams['figure.figsize'] = (5,3)
df = pd.DataFrame(columns=['RNA', 'protein', 'label'])
df['RNA'], df['protein'] = adata.obsm['alpha'][:, 0], adata.obsm['alpha'][:, 1]
df['label'] = adata.obs['SpatialGlue_number'].values
df = df.set_index('label').stack().reset_index()
df.columns = ['label_SpatialGlue', 'Modality', 'Weight value']
ax = sns.violinplot(data=df, x='label_SpatialGlue', y='Weight value', hue="Modality",
split=True, inner="quart", linewidth=1, show=False)
ax.set_title('RNA vs protein')
ax.set_xlabel('SpatialGlue label')
ax.legend(bbox_to_anchor=(1.4, 1.01), loc='upper right')
plt.tight_layout(w_pad=0.05)
#plt.show()
果叔还提供思路设计、定制生信分析、文献思路复现;有需要的小伙伴欢迎直接扫码咨询果叔,竭诚为您的科研助力!
定制生信分析
服务器租赁
扫码咨询果叔
往期回顾
01 “made in China”嘎嘎好用!中国人自己的数据库就是香!暨南大学:CHARLS+Cox回归分析,8.5分信手拈来超牛! |
02 |
03 IF=58.7,这泼天的多组学富贵可得接住!系统生物学研究团队开挂思路,机器学习助力个性化医疗,你就学吧,一看一个不吱声! |
04 |