[1]:
import scanpy as sc
import pandas as pd
from anndata import AnnData
from pathlib import Path
import json
from matplotlib.image import imread

from step import scModel, stModel
from step.utils.misc import read_visium_hd

sc.set_figure_params(dpi=150, figsize=(6, 4.5))
/projects/82505004-e7a0-445f-ab3c-80d03c91438f/.cache/pypoetry/virtualenvs/step-Ajq_Bw_i-py3.10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Visium HD Human Colorectal Cancer (16 um) cell type clustering & spatial domain identification

16 um bin size: cell type clustering

[ ]:
adata = read_visium_hd("./data/visium-hd/human-coloretal-cancer/square_016um/")
[3]:
stepc = scModel(
    adata=adata,
    n_top_genes=2000,
)
Trying seurat_v3 for hvgs
not log_transformed
Adding count data to layer 'counts'
================Dataset Info================
Batch key: None
Class key: None
Number of Batches: 1
Number of Classes: None
Gene Expr: (136984, 2000)
============================================
[4]:
stepc.run(epochs=400, batch_size=2048, beta=1e-3)
Performing global random split
Current Mode: single_batch: ['gene_expr']
 50%|█████     | 202/400 [18:05<17:37,  5.34s/epoch, kl_loss=0.956, recon_loss=460.729, val_kl_loss=0.967/0.498, val_recon_loss=458.524/457.657]Early Stopping triggered
 50%|█████     | 202/400 [18:05<17:43,  5.37s/epoch, kl_loss=0.956, recon_loss=460.729, val_kl_loss=0.967/0.498, val_recon_loss=458.524/457.657]
EarlyStopping counter: 30 out of 30
EarlyStopping counter: 202 out of 10
[5]:
adata = stepc.adata

sc.pp.neighbors(adata, use_rep='X_rep', n_neighbors=60)
sc.tl.umap(adata)
[6]:
sc.tl.leiden(adata)
sc.pl.umap(adata, color='leiden')
../_images/notebooks_human_colorectal_cancer_7_0.png
[7]:
sc.pl.spatial(adata, color='leiden')
../_images/notebooks_human_colorectal_cancer_8_0.png
[8]:
stepc.save("./results/visium-hd/hcc-16um/")
adata.write_h5ad("./results/visium-hd/hcc_16um.h5ad")
Saving model...
Saving model config...
Saving dataset config...
[2]:
adata = sc.read_h5ad("./results/visium-hd/hcc_16um.h5ad")
# sc.set_figure_params(dpi=300, dpi_save=300)
# sc.settings.figdir = './results/visium-hd/hcc-16um/'
[6]:
sc.pl.spatial(adata, color='leiden', frameon=False, groups=['7', '11', '14', '2', '9', '10'], na_in_legend=False)
../_images/notebooks_human_colorectal_cancer_11_0.png
[3]:
sc.settings.figdir = "./results/visium-hd/hcc-16um/"
sc.set_figure_params(dpi_save=300)
[4]:
sc.tl.rank_genes_groups(adata, groupby='leiden', dendrogram=False, use_raw=True)
sc.tl.dendrogram(adata, use_raw=True, groupby='leiden')
WARNING: You’re trying to run this on 2000 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.
[5]:
sc.pl.rank_genes_groups_matrixplot(adata,
                                   groupby='leiden',
                                   values_to_plot='logfoldchanges',
                                   cmap='RdBu_r',
                                   vmin=-4, vmax=4,
                                   save='16um_leiden.svg')
WARNING: saving figure to file results/visium-hd/hcc-16um/matrixplot_16um_leiden.svg
../_images/notebooks_human_colorectal_cancer_14_1.png
[9]:
sc.pl.rank_genes_groups_stacked_violin(adata,
                                       groupby='leiden',
                                       save='_16um_leiden.svg')
WARNING: saving figure to file results/visium-hd/hcc-16um/stacked_violin__16um_leiden.svg
../_images/notebooks_human_colorectal_cancer_15_1.png
[12]:
sc.pl.rank_genes_groups_heatmap(adata,
                                groupby='leiden',
                                n_genes=10,
                                swap_axes=True,
                                vmax=4,
                                show_gene_labels=True,
                                save='_16um_leiden.svg')
WARNING: saving figure to file results/visium-hd/hcc-16um/heatmap_16um_leiden.svg
../_images/notebooks_human_colorectal_cancer_16_1.png

16 um bin size: spatial domain identification

[2]:
adata = read_visium_hd("./data/visium-hd/human-coloretal-cancer/square_016um/")
[3]:
stepc = stModel(
    adata=adata,
    n_top_genes=3000,
    edge_clip=2,
    n_glayers=3,
)
Trying seurat_v3 for hvgs
not log_transformed
Adding count data to layer 'counts'
Dataset Done
================Dataset Info================
Batch key: None
Class key: None
Number of Batches: 1
Number of Classes: None
Gene Expr: (136984, 3000)
============================================
[4]:
stepc.run(n_iterations=4000, n_samples=2048, beta=1e-5)
Training with e2e pattern
Training graph with single batch
Constructing graph for batch
100%|██████████| 4000/4000 [06:14<00:00, 10.67step/s, recon_loss=1340.246, kl_loss=0.002, contrast_loss=1.701, graph_ids=None]
[5]:
stepc.cluster(n_clusters=12)
stepc.spatial_plot(color='domain',)
stepc.save("./results/visium-hd/hccspatial_plotomain/")
../_images/notebooks_human_colorectal_cancer_21_0.png
[8]:
stepc = stModel.load("./results/visium-hd/hccspatial_plotomain/",
                           filepath="./results/visium-hd/hcc_16um_domain.h5ad")
stepc.adata.obsm['X_smoothed'] = stepc.gembed()
stepc.cluster(n_clusters=8)
stepc.spatial_plot(color='domain',)
Trying seurat_v3 for hvgs
Adding count data to layer 'counts'
Dataset Done
================Dataset Info================
Batch key: None
Class key: None
Number of Batches: 1
Number of Classes: None
Gene Expr: (136984, 3000)
============================================
Loading backbone model...
Backbone model loaded.
../_images/notebooks_human_colorectal_cancer_22_1.png
[9]:
stepc.adata.write_h5ad("./results/visium-hd/hcc_16um_domain.h5ad")
[2]:
adata = sc.read_h5ad("./results/visium-hd/hcc_16um_domain.h5ad")
[3]:
sc.settings.figdir = "./results/visium-hd/hcc-16um/"
sc.set_figure_params(dpi_save=300, figsize=(12, 9))

sc.pl.spatial(adata, color='domain', frameon=False, show=False, save='_16um_domain.svg',)
[4]:
adata_raw = adata.raw.to_adata()
sc.pp.log1p(adata_raw)
adata.raw = adata_raw
[5]:
# adata = stepc.adata

sc.tl.rank_genes_groups(adata, groupby='domain', dendrogram=False, use_raw=True)
sc.tl.dendrogram(adata, use_raw=True, groupby='domain')
WARNING: You’re trying to run this on 3000 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.
[22]:
sc.pl.rank_genes_groups_matrixplot(adata,
                                   groupby='domain',
                                   values_to_plot='logfoldchanges',
                                   cmap='RdBu_r',
                                   vmin=-4, vmax=4,
                                   save='16um_domain.svg')
WARNING: saving figure to file results/visium-hd/hcc-16um/matrixplot__16um_domain.svg
../_images/notebooks_human_colorectal_cancer_28_1.png
[7]:
sc.pl.rank_genes_groups_stacked_violin(adata,
                                       groupby='domain',
                                       vmax=4,
                                       save='16um_domain.svg')
WARNING: saving figure to file results/visium-hd/hcc-16um/stacked_violin_16um_domain.svg
../_images/notebooks_human_colorectal_cancer_29_1.png
[23]:
sc.pl.rank_genes_groups_heatmap(adata,
                                groupby='domain',
                                n_genes=10,
                                swap_axes=True,
                                vmax=4,
                                show_gene_labels=True,
                                save='_16um_domain.svg')
WARNING: saving figure to file results/visium-hd/hcc-16um/heatmap_16um_domain.svg
../_images/notebooks_human_colorectal_cancer_30_1.png
[ ]: