Skip to content

Commit

Permalink
Merge pull request #117 from theislab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
LouisK92 authored Sep 25, 2024
2 parents ae2ab8d + f1fd322 commit 685b05f
Show file tree
Hide file tree
Showing 26 changed files with 1,515 additions and 459 deletions.
49 changes: 46 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
[build-system]
requires = [
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "txsim"
version = "0.1.2"
description = "Python package to measure the similarity between matched single cell and targeted spatial transcriptomics data"
authors = [
{ name = "Louis Kuemmerle", email = "[email protected]" },
{ name = "Habib Rehman", email = "[email protected]" }
]
readme = "README.md"
requires-python = ">=3.8"
dependencies = [
"setuptools",
"wheel",
"omnipath",
Expand All @@ -10,6 +23,36 @@ requires = [
"shapely",
"scikit-image",
"planktonspace",
"geopandas"
"geopandas",
"rasterio",
"anndata",
"scanpy",
"numpy",
"pandas",
"scipy",
]

[project.optional-dependencies]
dev = [
"pytest",
"pytest-cov",
"flake8",
"black",
"mypy",
"pre-commit",
]
build-backend = "setuptools.build_meta"

[tool.hatch.build.targets.wheel]
packages = ["txsim"]

[tool.hatch.version]
path = "txsim/__init__.py"

[tool.black]
line-length = 120

[tool.pytest.ini_options]
filterwarnings = [
"ignore::DeprecationWarning:pkg_resources",
"ignore::DeprecationWarning:xarray_schema"
]
7 changes: 0 additions & 7 deletions setup.cfg

This file was deleted.

3 changes: 0 additions & 3 deletions setup.py

This file was deleted.

14 changes: 14 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@


### About simulated test data

The test data is located in the `_data` directory. Some data files were generated by simulations. To run tests quicker
the simulation reuslts are stored in the repository and not rerun. If there are changes in the simulation code, the test
data should be updated. The data can be regenerated with the `generate_data.py` script:

```bash
cd _data
python generate_data.py
```

Note: this should only be done if you are sure that the simulation code is correct and the data should be updated.
Binary file added tests/_data/adata_sp_simulated.h5ad
Binary file not shown.
17 changes: 17 additions & 0 deletions tests/_data/generate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import scanpy as sc
import txsim as tx

if __name__ == "__main__":

# Simulate spatial adata
sim = tx.simulation.Simulation()
sim.simulate_spatial_data("IL32", n_groups=3, n_per_bin_and_ct=2, n_cols_cell_numb_increase=2, seed=0)
sim.simulate_exact_positions(spot_sampling_type='uniform')
sim.adata_sp.obs['celltype'] = sim.adata_sp.obs['louvain']
del sim.adata_sp.obs['louvain']
# Filter genes with 0 counts
sc.pp.filter_genes(sim.adata_sp, min_cells=1)
sim.adata_sp.layers['lognorm'] = sim.adata_sp.X
sim.adata_sp.write("adata_sp_simulated.h5ad")
#TODO: simulate image as well
#TODO: adata_sp.uns['spots'].index looks weird -> clean up
39 changes: 39 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pytest
import anndata as ad
import numpy as np

@pytest.fixture
def adata_sp():
adata = ad.read_h5ad("tests/_data/adata_sp_simulated.h5ad")
return adata

@pytest.fixture
def adata_sp_not_sparse():
adata = ad.read_h5ad("tests/_data/adata_sp_simulated.h5ad")
adata = adata.copy()
adata.X = adata.X.toarray()
for key in adata.layers.keys():
adata.layers[key] = adata.layers[key].toarray()
return adata

@pytest.fixture
def adata_sc_high_sim():
"""adata with high (but not perfect) similarity to adata_sp_simulated"""
adata = ad.read_h5ad("tests/_data/adata_sp_simulated.h5ad")
np.random.seed(0)
obs = np.random.choice(adata.obs_names, size=int(0.9*adata.n_obs), replace=True)
adata = adata[obs]
adata.obs.index = [f"sc_{i}" for i in range(adata.n_obs)]
adata = adata.copy() # Important after subsetting
for key in ["x","y","n_spots","grid_x","grid_y","area"]:
del adata.obs[key]
del adata.uns["spots"]
return adata

@pytest.fixture
def adata_sc_high_sim_not_sparse(adata_sc_high_sim):
adata = adata_sc_high_sim.copy()
adata.X = adata.X.toarray()
for key in adata.layers.keys():
adata.layers[key] = adata.layers[key].toarray()
return adata
126 changes: 126 additions & 0 deletions tests/test_quality_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import pytest
import pandas as pd
import txsim as tx

#TODO: Add tests that check if sparse and none sparse adata give the same results

@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"])
def test_cell_density(adata_spatial, request):
adata_spatial = request.getfixturevalue(adata_spatial)
density, density_per_celltype = tx.quality_metrics.cell_density(adata_spatial, pipeline_output=False)
assert isinstance(density, float)
assert isinstance(density_per_celltype, pd.Series)
assert density >= 0
assert density_per_celltype.sum() == density
assert (density_per_celltype >= 0).all()


@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"])
def test_proportion_of_assigned_reads(adata_spatial, request):
adata_spatial = request.getfixturevalue(adata_spatial)
reads_assigned, reads_assigned_per_gene, reads_assigned_per_ct = tx.quality_metrics.proportion_of_assigned_reads(
adata_spatial, pipeline_output=False
)

assert isinstance(reads_assigned, float)
assert isinstance(reads_assigned_per_gene, pd.Series)
assert isinstance(reads_assigned_per_ct, pd.Series)
# >= 0 for all
assert reads_assigned >= 0
assert (reads_assigned_per_gene >= 0).all()
assert (reads_assigned_per_ct >= 0).all()
# <= 1 for all
assert reads_assigned <= 1
assert (reads_assigned_per_gene <= 1).all()
assert (reads_assigned_per_ct <= 1).all()
# all genes and cell types in indices
assert reads_assigned_per_gene.index.isin(adata_spatial.var_names).all()
assert reads_assigned_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all()
# sum of cell type proportions equals total proportion
assert reads_assigned_per_ct.sum() == pytest.approx(reads_assigned)


@pytest.mark.parametrize("adata_spatial, statistic", [
("adata_sp", "mean"),
("adata_sp", "median"),
("adata_sp_not_sparse", "mean"),
("adata_sp_not_sparse", "median")
])
def test_reads_per_cell(adata_spatial, statistic, request):
adata_spatial = request.getfixturevalue(adata_spatial)
reads_per_cell, reads_per_cell_per_gene, reads_per_cell_per_ct = tx.quality_metrics.reads_per_cell(
adata_spatial, statistic=statistic, pipeline_output=False
)

assert isinstance(reads_per_cell, float)
assert isinstance(reads_per_cell_per_gene, pd.Series)
assert isinstance(reads_per_cell_per_ct, pd.Series)
# >= 0 for all
assert reads_per_cell >= 0
assert (reads_per_cell_per_gene >= 0).all()
assert (reads_per_cell_per_ct >= 0).all()
# all genes and cell types in indices
assert reads_per_cell_per_gene.index.isin(adata_spatial.var_names).all()
assert reads_per_cell_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all()
# per gene <= total
assert (reads_per_cell_per_gene <= reads_per_cell).all()


@pytest.mark.parametrize("adata_spatial, statistic", [
("adata_sp", "mean"),
("adata_sp", "median"),
("adata_sp_not_sparse", "mean"),
("adata_sp_not_sparse", "median")
])
def test_genes_per_cell(adata_spatial, statistic, request):
adata_spatial = request.getfixturevalue(adata_spatial)
genes_per_cell, genes_per_cell_per_ct = tx.quality_metrics.genes_per_cell(
adata_spatial, statistic=statistic, pipeline_output=False
)

assert isinstance(genes_per_cell, float)
assert isinstance(genes_per_cell_per_ct, pd.Series)
# >= 0 for all
assert genes_per_cell >= 0
assert (genes_per_cell_per_ct >= 0).all()
# <= adata.n_vars for all
assert genes_per_cell <= adata_spatial.n_vars
assert (genes_per_cell_per_ct <= adata_spatial.n_vars).all()
# all cell types in indices
assert genes_per_cell_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all()
# min per cell type <= total & max per cell type >= total
assert (genes_per_cell_per_ct.min() <= genes_per_cell)
assert (genes_per_cell_per_ct.max() >= genes_per_cell)


@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"])
def test_number_of_genes(adata_spatial, request):
adata_spatial = request.getfixturevalue(adata_spatial)
n_genes, n_genes_per_ct = tx.quality_metrics.number_of_genes(adata_spatial, pipeline_output=False)

assert isinstance(n_genes, int)
assert isinstance(n_genes_per_ct, pd.Series)
# >= 0 for all
assert n_genes >= 0
assert (n_genes_per_ct >= 0).all()
# all cell types in indices
assert n_genes_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all()
# per cell type <= total
assert (n_genes_per_ct <= n_genes).all()


@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"])
def test_number_of_cells(adata_spatial, request):
adata_spatial = request.getfixturevalue(adata_spatial)
n_cells, n_cells_per_ct = tx.quality_metrics.number_of_cells(adata_spatial, pipeline_output=False)

assert isinstance(n_cells, int)
assert isinstance(n_cells_per_ct, pd.Series)
# >= 0 for all
assert n_cells >= 0
assert (n_cells_per_ct >= 0).all()
# all cell types in indices
assert n_cells_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all()
# sum of cell type counts equals total count
assert n_cells_per_ct.sum() == n_cells

Loading

0 comments on commit 685b05f

Please sign in to comment.