From f073e7bf2c0f732a77f35547c8f31f4025e7ee57 Mon Sep 17 00:00:00 2001 From: Sai Nirmayi Yasa <92786623+sainirmayi@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:39:52 +0530 Subject: [PATCH] FIx dataset simulator (#431) * explicitly convert non-string objects in .obs slots to categoricals * Add info about fix in comments Former-commit-id: 624c44219ad04e350e5eb38ab2beb4b2c9179a03 --- .../dataset_simulator/script.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/tasks/spatial_decomposition/dataset_simulator/script.py b/src/tasks/spatial_decomposition/dataset_simulator/script.py index 48daafd374..901d7def5f 100644 --- a/src/tasks/spatial_decomposition/dataset_simulator/script.py +++ b/src/tasks/spatial_decomposition/dataset_simulator/script.py @@ -23,9 +23,11 @@ ## VIASH END CELLTYPE_MIN_CELLS = 25 + # Reading input dataset adata = ad.read_h5ad(par['input']) + def generate_synthetic_dataset( adata: ad.AnnData, alpha: Union[float, Sequence] = 1.0, @@ -162,12 +164,14 @@ def generate_synthetic_dataset( adata_merged.uns["cell_type_names"] = uni_labs return adata_merged + def filter_celltypes(adata, min_cells=CELLTYPE_MIN_CELLS): """Filter rare celltypes from an AnnData""" celltype_counts = adata.obs["cell_type"].value_counts() >= min_cells keep_cells = np.isin(adata.obs["cell_type"], celltype_counts.index[celltype_counts]) return adata[adata.obs.index[keep_cells]].copy() + def filter_genes_cells(adata): """Remove empty cells and genes.""" if "var_names_all" not in adata.uns: @@ -176,6 +180,7 @@ def filter_genes_cells(adata): sc.pp.filter_genes(adata, min_cells=1) sc.pp.filter_cells(adata, min_counts=2) + adata.X = adata.layers["counts"] sc.pp.filter_genes(adata, min_counts=10) adata_merged = generate_synthetic_dataset(adata, @@ -189,8 +194,15 @@ def filter_genes_cells(adata): adata_merged.uns["spatial_data_summary"] = f"Dirichlet alpha={par['alpha']}" filter_genes_cells(adata_merged) adata_merged.X = None -if "is_primary_data" in adata_merged.obs: - adata_merged.obs['is_primary_data'] = adata_merged.obs['is_primary_data'].fillna(False) + +# Convert non-string objects to categoricals to avoid +# TypeError: Can't implicitly convert non-string objects to strings +# In this case, the error is raised when there are NA values in .obs columns with dtype object (boolean). +# The resulting anndata object cannot be written to a file. +# This conversion is handled in later versions of anndata (0.10) +for col in adata_merged.obs: + if adata_merged.obs[col].dtype == 'object': + adata_merged.obs[col] = adata_merged.obs[col].astype('category') print("Writing output to file") adata_merged.write_h5ad(par["simulated_data"])