Tutorial 2: Data pre-processing for STARmap_PLUS data
STARmap PLUS enables high-resolution spatial transcriptomics along with protein detection in the same tissue section. The pre-processings aim to regist the extracellular proteins with cells.
[12]:
import os
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Define the path and hyperparameters
[13]:
N = 3
threshold_tau = 15
threshold_plaque = 50
[14]:
name = '13months-disease-replicate_1_random.h5ad'
path = os.path.join('/home/wzk/ST_data/AD_mouse', name)
save_dir = '/home/wzk/ST_data/AD_mouse/'
rna_adata = sc.read_h5ad(path)
path_plaque = '/home/wzk/ST_data/AD_mouse/tau&abeta/plaque_13months-disease-replicate_1.csv'
tao_file = np.load('/home/wzk/ST_data/AD_mouse/tau&abeta/13months-disease-replicate_1_tau_center.npy')
plaque_file = pd.read_csv(path_plaque)
Plot RNA and MSI data in the original coodinates
[15]:
fig, axs = plt.subplots(1, 3, figsize=(20, 6))
sc.pl.embedding(rna_adata, basis='spatial', color='leiden', title=f'Expression of rna (scanpy)', ax=axs[0], show=False)
x_max = tao_file[:, 0].max()
axs[1].set_title('Expression of rna (matplotlib)')
axs[1].scatter(rna_adata.obsm['spatial'][:, 0], rna_adata.obsm['spatial'][:, 1], color='red', s=0.5) # 使用蓝色标记点
axs[2].set_title('Expression of plaque and tao (matplotlib)')
axs[2].scatter(plaque_file['m.cx'].values, plaque_file['m.cy'].values, color='black', s=5) # 使用蓝色标记点
axs[2].scatter(rna_adata.obsm['spatial'][:, 0], rna_adata.obsm['spatial'][:, 1], color='black', s=0.2, ) # 使用蓝色标记点
if name[0] == '8':
axs[2].scatter(x_max - tao_file[:, 0], tao_file[:, 1], color='green', s=0.5)
else:
axs[2].scatter(tao_file[:, 0], tao_file[:, 1], color='green', s=0.5)
plt.show()
/home/wzk/anaconda3/envs/pytorch_zk/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
[16]:
plaque_coordinates = np.array((plaque_file['m.cx'].values, plaque_file['m.cy'].values)).T
tao_coordinates = np.array((tao_file[:, 0], tao_file[:, 1])).T
cell_coordinates = rna_adata.obsm['spatial']
Calculate the distance matrix
[17]:
distance_matrix_plaque_cell = np.sqrt(((plaque_coordinates[:, np.newaxis, :] - cell_coordinates[np.newaxis, :, :]) ** 2).sum(axis=2))
distance_matrix_tao_cell = np.sqrt(((tao_coordinates[:, np.newaxis, :] - cell_coordinates[np.newaxis, :, :]) ** 2).sum(axis=2))
[18]:
temp_tao = np.argsort(distance_matrix_tao_cell, axis=1)[:, :N]#.reshape(-1)
temp_plaque = np.argsort(distance_matrix_plaque_cell, axis=1)[:, :N]#.reshape(-1)
Align proteins to cells according to the predefined distances
[19]:
mask_tao = (distance_matrix_tao_cell.min(axis=1) < threshold_tau)
mask_plaque = (distance_matrix_plaque_cell.min(axis=1) < threshold_plaque)
temp_tao = temp_tao[mask_tao, ].reshape(-1)
temp_plaque = temp_plaque[mask_plaque, ].reshape(-1)
[20]:
index_tao = np.zeros(rna_adata.shape[0])
for i in temp_tao.tolist():
index_tao[i] = 1
print(index_tao.sum())
index_plaque = np.zeros(rna_adata.shape[0])
for i in temp_plaque.tolist():
index_plaque[i] = 1
print(index_plaque.sum())
1116.0
429.0
[21]:
rna_adata.obs['tao'] = index_tao
rna_adata.obs['plaque'] = index_plaque
Visualize the aligned data
[22]:
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
sc.pl.embedding(rna_adata, basis='spatial', color='tao', title=f'Expression of rna (scanpy)', ax=axs[0], show=False)
# sc.pl.embedding(rna_adata, basis='spatial', color='plaque', title=f'Expression of rna (scanpy)', ax=axs[1], show=False)
mask_tao = rna_adata.obs['tao'].values == 1
mask_plaque = rna_adata.obs['plaque'].values == 1
axs[1].scatter(rna_adata.obsm['spatial'][mask_tao, 0], rna_adata.obsm['spatial'][mask_tao, 1], color='green', s=0.5)
axs[1].scatter(rna_adata.obsm['spatial'][mask_plaque, 0], rna_adata.obsm['spatial'][mask_plaque, 1], color='black', s=3)
[22]:
<matplotlib.collections.PathCollection at 0x7f622f4dcf70>