From 215d85fba7b28fa12ea6d6818d5ac4db0d6c818f Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Mon, 4 Mar 2024 20:36:27 +0100 Subject: [PATCH] Fix problems with small batches during doublet detection --- modules/dedoublet_adata.nf | 2 +- modules/solo.nf | 19 +++++++------------ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/modules/dedoublet_adata.nf b/modules/dedoublet_adata.nf index 5202996..dfecfcf 100644 --- a/modules/dedoublet_adata.nf +++ b/modules/dedoublet_adata.nf @@ -25,7 +25,7 @@ process DEDOUBLET_ADATA { solo_annotation = solo_annotation.reindex(adata.obs_names) # Keep only cells with "singlet" in the "doublet_label" column - adata = adata[solo_annotation["doublet_label"] == "singlet", :] + adata = adata[~solo_annotation["doublet_label"] == "doublet", :] # Save the AnnData object adata.write_h5ad("${meta.id}.dedup.h5ad") diff --git a/modules/solo.nf b/modules/solo.nf index 5a50852..9e085ee 100644 --- a/modules/solo.nf +++ b/modules/solo.nf @@ -28,14 +28,6 @@ process SOLO { adata = sc.read_h5ad("${adata}") adata_batch = adata[adata.obs.batch == "${batch}"] - batch_size = len(adata_batch) - - if batch_size < 128: - print(f"Batch size too small ({batch_size}), skipping SOLO") - solo_res = pd.DataFrame(index=adata_batch.obs.index) - solo_res["doublet_label"] = "Unknown" - solo_res.to_pickle("${new_meta.id}.solo.pkl") - exit(0) if ${has_celltypes ? "True" : "False"}: scvi.model.SCANVI.setup_anndata(adata, batch_key="batch", labels_key="cell_type", unlabeled_category="Unknown") @@ -46,7 +38,7 @@ process SOLO { solo = scvi.external.SOLO.from_scvi_model(scvi_model, restrict_to_batch="${batch}") - minibatch_size = 128 + minibatch_size = min(128, len(adata_batch)) worked = False while not worked and minibatch_size > 100: try: @@ -55,10 +47,13 @@ process SOLO { except ValueError: print("Minibatch size did not work, trying again with smaller minibatch size") minibatch_size -= 1 - pass - solo_res = solo.predict() - solo_res["doublet_label"] = solo.predict(False) + if worked: + solo_res = solo.predict() + solo_res["doublet_label"] = solo.predict(False) + else: + solo_res = pd.DataFrame(index=adata_batch.obs.index) + solo_res["doublet_label"] = "Unknown" solo_res.to_pickle("${new_meta.id}.solo.pkl") """