-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #117 from theislab/dev
Dev
- Loading branch information
Showing
26 changed files
with
1,515 additions
and
459 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,18 @@ | ||
[build-system] | ||
requires = [ | ||
requires = ["hatchling"] | ||
build-backend = "hatchling.build" | ||
|
||
[project] | ||
name = "txsim" | ||
version = "0.1.2" | ||
description = "Python package to measure the similarity between matched single cell and targeted spatial transcriptomics data" | ||
authors = [ | ||
{ name = "Louis Kuemmerle", email = "[email protected]" }, | ||
{ name = "Habib Rehman", email = "[email protected]" } | ||
] | ||
readme = "README.md" | ||
requires-python = ">=3.8" | ||
dependencies = [ | ||
"setuptools", | ||
"wheel", | ||
"omnipath", | ||
|
@@ -10,6 +23,36 @@ requires = [ | |
"shapely", | ||
"scikit-image", | ||
"planktonspace", | ||
"geopandas" | ||
"geopandas", | ||
"rasterio", | ||
"anndata", | ||
"scanpy", | ||
"numpy", | ||
"pandas", | ||
"scipy", | ||
] | ||
|
||
[project.optional-dependencies] | ||
dev = [ | ||
"pytest", | ||
"pytest-cov", | ||
"flake8", | ||
"black", | ||
"mypy", | ||
"pre-commit", | ||
] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[tool.hatch.build.targets.wheel] | ||
packages = ["txsim"] | ||
|
||
[tool.hatch.version] | ||
path = "txsim/__init__.py" | ||
|
||
[tool.black] | ||
line-length = 120 | ||
|
||
[tool.pytest.ini_options] | ||
filterwarnings = [ | ||
"ignore::DeprecationWarning:pkg_resources", | ||
"ignore::DeprecationWarning:xarray_schema" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
|
||
### About simulated test data | ||
|
||
The test data is located in the `_data` directory. Some data files were generated by simulations. To run tests quicker | ||
the simulation reuslts are stored in the repository and not rerun. If there are changes in the simulation code, the test | ||
data should be updated. The data can be regenerated with the `generate_data.py` script: | ||
|
||
```bash | ||
cd _data | ||
python generate_data.py | ||
``` | ||
|
||
Note: this should only be done if you are sure that the simulation code is correct and the data should be updated. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import scanpy as sc | ||
import txsim as tx | ||
|
||
if __name__ == "__main__": | ||
|
||
# Simulate spatial adata | ||
sim = tx.simulation.Simulation() | ||
sim.simulate_spatial_data("IL32", n_groups=3, n_per_bin_and_ct=2, n_cols_cell_numb_increase=2, seed=0) | ||
sim.simulate_exact_positions(spot_sampling_type='uniform') | ||
sim.adata_sp.obs['celltype'] = sim.adata_sp.obs['louvain'] | ||
del sim.adata_sp.obs['louvain'] | ||
# Filter genes with 0 counts | ||
sc.pp.filter_genes(sim.adata_sp, min_cells=1) | ||
sim.adata_sp.layers['lognorm'] = sim.adata_sp.X | ||
sim.adata_sp.write("adata_sp_simulated.h5ad") | ||
#TODO: simulate image as well | ||
#TODO: adata_sp.uns['spots'].index looks weird -> clean up |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import pytest | ||
import anndata as ad | ||
import numpy as np | ||
|
||
@pytest.fixture | ||
def adata_sp(): | ||
adata = ad.read_h5ad("tests/_data/adata_sp_simulated.h5ad") | ||
return adata | ||
|
||
@pytest.fixture | ||
def adata_sp_not_sparse(): | ||
adata = ad.read_h5ad("tests/_data/adata_sp_simulated.h5ad") | ||
adata = adata.copy() | ||
adata.X = adata.X.toarray() | ||
for key in adata.layers.keys(): | ||
adata.layers[key] = adata.layers[key].toarray() | ||
return adata | ||
|
||
@pytest.fixture | ||
def adata_sc_high_sim(): | ||
"""adata with high (but not perfect) similarity to adata_sp_simulated""" | ||
adata = ad.read_h5ad("tests/_data/adata_sp_simulated.h5ad") | ||
np.random.seed(0) | ||
obs = np.random.choice(adata.obs_names, size=int(0.9*adata.n_obs), replace=True) | ||
adata = adata[obs] | ||
adata.obs.index = [f"sc_{i}" for i in range(adata.n_obs)] | ||
adata = adata.copy() # Important after subsetting | ||
for key in ["x","y","n_spots","grid_x","grid_y","area"]: | ||
del adata.obs[key] | ||
del adata.uns["spots"] | ||
return adata | ||
|
||
@pytest.fixture | ||
def adata_sc_high_sim_not_sparse(adata_sc_high_sim): | ||
adata = adata_sc_high_sim.copy() | ||
adata.X = adata.X.toarray() | ||
for key in adata.layers.keys(): | ||
adata.layers[key] = adata.layers[key].toarray() | ||
return adata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import pytest | ||
import pandas as pd | ||
import txsim as tx | ||
|
||
#TODO: Add tests that check if sparse and none sparse adata give the same results | ||
|
||
@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"]) | ||
def test_cell_density(adata_spatial, request): | ||
adata_spatial = request.getfixturevalue(adata_spatial) | ||
density, density_per_celltype = tx.quality_metrics.cell_density(adata_spatial, pipeline_output=False) | ||
assert isinstance(density, float) | ||
assert isinstance(density_per_celltype, pd.Series) | ||
assert density >= 0 | ||
assert density_per_celltype.sum() == density | ||
assert (density_per_celltype >= 0).all() | ||
|
||
|
||
@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"]) | ||
def test_proportion_of_assigned_reads(adata_spatial, request): | ||
adata_spatial = request.getfixturevalue(adata_spatial) | ||
reads_assigned, reads_assigned_per_gene, reads_assigned_per_ct = tx.quality_metrics.proportion_of_assigned_reads( | ||
adata_spatial, pipeline_output=False | ||
) | ||
|
||
assert isinstance(reads_assigned, float) | ||
assert isinstance(reads_assigned_per_gene, pd.Series) | ||
assert isinstance(reads_assigned_per_ct, pd.Series) | ||
# >= 0 for all | ||
assert reads_assigned >= 0 | ||
assert (reads_assigned_per_gene >= 0).all() | ||
assert (reads_assigned_per_ct >= 0).all() | ||
# <= 1 for all | ||
assert reads_assigned <= 1 | ||
assert (reads_assigned_per_gene <= 1).all() | ||
assert (reads_assigned_per_ct <= 1).all() | ||
# all genes and cell types in indices | ||
assert reads_assigned_per_gene.index.isin(adata_spatial.var_names).all() | ||
assert reads_assigned_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all() | ||
# sum of cell type proportions equals total proportion | ||
assert reads_assigned_per_ct.sum() == pytest.approx(reads_assigned) | ||
|
||
|
||
@pytest.mark.parametrize("adata_spatial, statistic", [ | ||
("adata_sp", "mean"), | ||
("adata_sp", "median"), | ||
("adata_sp_not_sparse", "mean"), | ||
("adata_sp_not_sparse", "median") | ||
]) | ||
def test_reads_per_cell(adata_spatial, statistic, request): | ||
adata_spatial = request.getfixturevalue(adata_spatial) | ||
reads_per_cell, reads_per_cell_per_gene, reads_per_cell_per_ct = tx.quality_metrics.reads_per_cell( | ||
adata_spatial, statistic=statistic, pipeline_output=False | ||
) | ||
|
||
assert isinstance(reads_per_cell, float) | ||
assert isinstance(reads_per_cell_per_gene, pd.Series) | ||
assert isinstance(reads_per_cell_per_ct, pd.Series) | ||
# >= 0 for all | ||
assert reads_per_cell >= 0 | ||
assert (reads_per_cell_per_gene >= 0).all() | ||
assert (reads_per_cell_per_ct >= 0).all() | ||
# all genes and cell types in indices | ||
assert reads_per_cell_per_gene.index.isin(adata_spatial.var_names).all() | ||
assert reads_per_cell_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all() | ||
# per gene <= total | ||
assert (reads_per_cell_per_gene <= reads_per_cell).all() | ||
|
||
|
||
@pytest.mark.parametrize("adata_spatial, statistic", [ | ||
("adata_sp", "mean"), | ||
("adata_sp", "median"), | ||
("adata_sp_not_sparse", "mean"), | ||
("adata_sp_not_sparse", "median") | ||
]) | ||
def test_genes_per_cell(adata_spatial, statistic, request): | ||
adata_spatial = request.getfixturevalue(adata_spatial) | ||
genes_per_cell, genes_per_cell_per_ct = tx.quality_metrics.genes_per_cell( | ||
adata_spatial, statistic=statistic, pipeline_output=False | ||
) | ||
|
||
assert isinstance(genes_per_cell, float) | ||
assert isinstance(genes_per_cell_per_ct, pd.Series) | ||
# >= 0 for all | ||
assert genes_per_cell >= 0 | ||
assert (genes_per_cell_per_ct >= 0).all() | ||
# <= adata.n_vars for all | ||
assert genes_per_cell <= adata_spatial.n_vars | ||
assert (genes_per_cell_per_ct <= adata_spatial.n_vars).all() | ||
# all cell types in indices | ||
assert genes_per_cell_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all() | ||
# min per cell type <= total & max per cell type >= total | ||
assert (genes_per_cell_per_ct.min() <= genes_per_cell) | ||
assert (genes_per_cell_per_ct.max() >= genes_per_cell) | ||
|
||
|
||
@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"]) | ||
def test_number_of_genes(adata_spatial, request): | ||
adata_spatial = request.getfixturevalue(adata_spatial) | ||
n_genes, n_genes_per_ct = tx.quality_metrics.number_of_genes(adata_spatial, pipeline_output=False) | ||
|
||
assert isinstance(n_genes, int) | ||
assert isinstance(n_genes_per_ct, pd.Series) | ||
# >= 0 for all | ||
assert n_genes >= 0 | ||
assert (n_genes_per_ct >= 0).all() | ||
# all cell types in indices | ||
assert n_genes_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all() | ||
# per cell type <= total | ||
assert (n_genes_per_ct <= n_genes).all() | ||
|
||
|
||
@pytest.mark.parametrize("adata_spatial", ["adata_sp", "adata_sp_not_sparse"]) | ||
def test_number_of_cells(adata_spatial, request): | ||
adata_spatial = request.getfixturevalue(adata_spatial) | ||
n_cells, n_cells_per_ct = tx.quality_metrics.number_of_cells(adata_spatial, pipeline_output=False) | ||
|
||
assert isinstance(n_cells, int) | ||
assert isinstance(n_cells_per_ct, pd.Series) | ||
# >= 0 for all | ||
assert n_cells >= 0 | ||
assert (n_cells_per_ct >= 0).all() | ||
# all cell types in indices | ||
assert n_cells_per_ct.index.isin(adata_spatial.obs["celltype"].unique()).all() | ||
# sum of cell type counts equals total count | ||
assert n_cells_per_ct.sum() == n_cells | ||
|
Oops, something went wrong.