1、Load Required Package
%time
exec('import warnings; warnings.filterwarnings("ignore")')
import matplotlib.pyplot as plt
import spateo as st
import numpy as np
import skimage
import sklearn
st.config.n_threads = 8
2、Required Data
进行圈细胞工作需要准备的数据有空间计数矩阵和ssDNA 染色图像,测试数据可以从数据库 https://db.cngb.org/stomics/mosta/ 下载使用。
2、Load Data
### 加载 UMI 计数矩阵和细胞核染色图像到 AnnData 对象
adata = st.io.read_bgi_agg("./E12.5_E1S3_GEM_bin1.tsv.gz", "./E12.5_E1S3.tif")
print(adata.layers["stain"].shape)
adata
|-----> Constructing count matrices.
|-----> <insert> __type to uns in AnnData Object.
|-----> <insert> pp to uns in AnnData Object.
|-----> <insert> spatial to uns in AnnData Object.
(22834, 17384)
AnnData object with n_obs × n_vars = 22834 × 17384
uns: '__type', 'pp', 'spatial'
layers: 'stain'
st.pl.imshow(adata, 'stain') ### 显示加载的 SSDNA stain image
3、Cell Segmentation
3.1 Watershed-based 细胞核标记
st.cs.mask_nuclei_from_stain(adata) ### 创建染色图像细胞核蒙版
st.pl.imshow(adata, 'stain_mask')
plt.savefig("stain_mask.png",dpi=600)
plt.close()
st.cs.find_peaks_from_mask(adata, 'stain', 7)
st.cs.watershed(adata, 'stain', 5, out_layer='watershed_labels')
fig, ax = st.pl.imshow(adata, 'stain', save_show_or_return='return')
st.pl.imshow(adata, 'watershed_labels', labels=True, alpha=0.5, ax=ax)
plt.savefig("waterfall.png",dpi=600)
plt.close()
3.2 StarDist 细胞核标记
作者测试后表明 StarDist 在这些方法是比较其它几种荧光核方法分割表现最稳定的,推荐使用。
st.cs.stardist(adata, tilesize=2000, equalize=2.0, out_layer='stardist_labels')
fig, ax = st.pl.imshow(adata, 'stain', save_show_or_return='return')
st.pl.imshow(adata, 'stardist_labels', labels=True, alpha=0.5, ax=ax)
plt.savefig("fig5_stardist.png",dpi=600)
plt.close()
3.3 优化标记
Watershed 和 StarDist 方法在细胞核识别方面均表现良好,但它们各有局限性。由于阈值的性质,Watershed 方法往往会导致细胞边界粗糙,而 StarDist 有时难以识别密集区域中的细胞核。需要通过使用 Watershed 标签增加 StarDist 标签、复制不与任何 Stardist 标签重叠的 Watershed 标签以及删除不与任何 Watershed 标签重叠的 Stardist 标签来缓解这些问题。
st.cs.augment_labels(adata, 'watershed_labels', 'stardist_labels', out_layer='augmented_labels')
fig, ax = st.pl.imshow(adata, 'stain', save_show_or_return='return')
st.pl.imshow(adata, 'augmented_labels', labels=True, alpha=0.5, ax=ax)
plt.savefig("Argmented_labels.png",dpi=600)
plt.close()
4、扩展标记的核区域到 细胞质
由于 ssDNA 染色对细胞质的染色较弱,这里将使用宽松阈值对图像进行阈值处理来识别细胞质区域。
st.cs.mask_cells_from_stain(adata, out_layer='stain_cell_mask')
st.cs.watershed(adata,'stain',mask_layer='stain_cell_mask', markers_layer='augmented_labels',out_layer='cell_labels')
fig, ax = st.pl.imshow(adata, 'stain', save_show_or_return='return')
st.pl.imshow(adata, 'cell_labels', labels=True, alpha=0.5, ax=ax)
st.cs.expand_labels(adata, 'augmented_labels', distance=5, max_area=400)
st.cs.expand_labels(adata, 'cell_labels', distance=2, out_layer='cell_labels_expanded')
fig, ax = st.pl.imshow(adata, 'stain', save_show_or_return='return')
st.pl.imshow(adata, 'cell_labels_expanded', labels=True, alpha=0.5, ax=ax)
5、获取CELL BIN表达矩阵
cell_adata = st.io.read_bgi("E12.5_E1S3_GEM_bin1.tsv.gz",segmentation_adata=adata, labels_layer='cell_labels_expanded',)
cell_adata
|-----> <select> cell_labels_expanded layer in AnnData Object
|-----> Using labels provided with `segmentation_adata` and `labels_layer` arguments.
|-----> Constructing count matrices.
|-----> <insert> __type to uns in AnnData Object.
|-----> <insert> pp to uns in AnnData Object.
|-----> <insert> spatial to uns in AnnData Object.
AnnData object with n_obs × n_vars = 70475 × 26274
obs: 'area'
uns: '__type', 'pp', 'spatial'
obsm: 'spatial', 'contour', 'bbox'
layers: 'spliced', 'unspliced'
import joblib
joblib.dump(cell_adata,"cell_adata_segmentation.pkl")