Merge pull request #237 from vib-singlecell-nf/feature/229-support_fo…

…r_soupx Feature/229 support for soupx
vib-singlecell-nf · Oct 19, 2020 · bd44086 · bd44086
2 parents cb0ba62 + aa639cc
commit bd44086
Show file tree

Hide file tree

Showing 5 changed files with 147 additions and 3 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -55,3 +55,7 @@
 	path = src/celda
 	url = https://github.com/vib-singlecell-nf/celda.git
 	branch = develop
+[submodule "src/soupx"]
+	path = src/soupx
+	url = [email protected]:vib-singlecell-nf/soupx.git
+	branch = develop
diff --git a/main.nf b/main.nf
@@ -433,6 +433,116 @@ workflow single_sample_decontx_scrublet {
     }
 }
 
+workflow soupx {
+
+    include {
+        soupx as SOUPX__DECONTX;
+    } from "./src/soupx/main" params(params)
+    // Run DecontX on the data
+    SOUPX__DECONTX()
+
+}
+
+workflow single_sample_soupx {
+
+    include {
+        SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE;
+    } from './src/scanpy/workflows/single_sample' params(params)
+    include {
+        soupx as SOUPX;
+    } from "./src/soupx/main" params(params)
+    include {
+        ANNOTATE_BY_CELL_METADATA_BY_PAIR;
+    } from './src/utils/workflows/annotateByCellMetadata.nf' params(params)
+    include {
+        PUBLISH;
+    } from './src/utils/workflows/utils.nf' params(params)
+    include {
+        SC__H5AD_TO_LOOM;
+    } from './src/utils/processes/h5adToLoom.nf' params(params)
+
+    data = getDataChannel \
+        | SC__FILE_CONVERTER
+    // Run Single-sample pipeline on the data
+    SCANPY__SINGLE_SAMPLE( data )
+    // Run DecontX on the data
+    SOUPX()
+
+    SC__H5AD_TO_LOOM(
+        SCANPY__SINGLE_SAMPLE.out.filtered_data.map {
+            it -> tuple(it[0], it[1])
+        }
+    )
+    if(params.utils.containsKey("publish")) {
+        PUBLISH(
+            SC__H5AD_TO_LOOM.out,
+            "SINGLE_SAMPLE_CELDA_DECONTX_"+ params.sc.celda.decontx.strategy.toUpperCase(),
+            "loom",
+            null,
+            false
+        )
+    }
+
+}
+
+workflow single_sample_soupx_scrublet {
+    include {
+        SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE;
+    } from './src/scanpy/workflows/single_sample' params(params)
+    include {
+        soupx as SOUPX;
+    } from "./src/soupx/main" params(params)
+    include {
+        DOUBLET_REMOVAL as SCRUBLET__DOUBLET_REMOVAL;
+    } from "./src/scrublet/workflows/doublet_removal" params(params)
+    include {
+        ANNOTATE_BY_CELL_METADATA_BY_PAIR;
+    } from './src/utils/workflows/annotateByCellMetadata.nf' params(params)
+    include {
+        PUBLISH;
+    } from './src/utils/workflows/utils.nf' params(params)
+    include {
+        SC__H5AD_TO_LOOM;
+    } from './src/utils/processes/h5adToLoom.nf' params(params)
+
+    data = getDataChannel \
+        | SC__FILE_CONVERTER
+    // Run Single-sample pipeline on the data
+    SCANPY__SINGLE_SAMPLE( data )
+    // Run Soupx on the data
+    SOUPX()
+    // Run Scrublet on the DecontX filtered data
+    SCRUBLET__DOUBLET_REMOVAL(
+        SOUPX.out.soupx_processed.join( SCANPY__SINGLE_SAMPLE.out.dr_pca_data ),
+        SCANPY__SINGLE_SAMPLE.out.final_processed_data
+    )
+
+    // Annotate the final processed file with doublet information inferred from Scrublet
+    ANNOTATE_BY_CELL_METADATA_BY_PAIR(
+        SCANPY__SINGLE_SAMPLE.out.final_processed_data,
+        SCRUBLET__DOUBLET_REMOVAL.out.doublet_detection.map {
+            it -> tuple(it[0], it[1])
+        },
+        "scrublet"
+    )
+    SC__H5AD_TO_LOOM(
+        SCANPY__SINGLE_SAMPLE.out.filtered_data.map {
+            it -> tuple(it[0], it[1])
+        }.join(
+            ANNOTATE_BY_CELL_METADATA_BY_PAIR.out
+        )
+    )
+    if(params.utils.containsKey("publish")) {
+        PUBLISH(
+            SC__H5AD_TO_LOOM.out,
+            "SINGLE_SAMPLE_SOUPX_CORRECT_SCRUBLET",
+            "loom",
+            null,
+            false
+        )
+    }
+}
+
 // run single_sample, then scenic from the previous input (not standalone):
 workflow pipe_single_sample_scenic {
 

diff --git a/nextflow.config b/nextflow.config
@@ -141,6 +141,13 @@ profiles {
         includeConfig 'src/celda/celda.config'
         includeConfig 'src/celda/conf/decontx_correct.config'
     }
+    single_sample_soupx_correct_scrublet {
+        includeConfig 'src/scanpy/scanpy.config'
+        includeConfig 'src/scrublet/scrublet.config'
+        includeConfig 'src/utils/conf/scope.config'
+        includeConfig 'src/soupx/soupx.config'
+        includeConfig 'src/soupx/conf/soupx_correct.config'
+    }
     scenic {
         includeConfig 'src/scenic/scenic.config'
         includeConfig 'src/utils/conf/scope.config'
@@ -218,6 +225,13 @@ profiles {
         includeConfig 'src/celda/conf/decontx_filter.config'
     }
 
+    // soupx profiles 
+
+    soupx {
+        includeConfig 'src/soupx/soupx.config'
+        includeConfig 'src/soupx/conf/soupx_correct.config'
+    }
+
     // directs profiles:
 
     directs {

diff --git a/src/soupx b/src/soupx
diff --git a/src/utils/bin/sc_file_converter.R b/src/utils/bin/sc_file_converter.R
@@ -186,12 +186,24 @@ if(INPUT_FORMAT == 'seurat_rds' & OUTPUT_FORMAT == 'h5ad') {
     if(class(x = sce) != "SingleCellExperiment") {
       	stop("VSN ERROR: The object contained in the Rds file is not a SingleCellExperiment object.")
     }
+    # Set/update row.names with gene symbols
+    row_data <- SummarizedExperiment::rowData(x = sce)
+    if("Symbol" %in% colnames(x = row_data)) {
+	    row.names(x = sce) <- row_data$Symbol
+    }
+    # Tag cell with sample ID
+    if(isTrue(x = args$`tag_cell_with_sample_id`)) {
+		new.names <- gsub(
+			pattern = "-([0-9]+)$",
+			replace = paste0("-", args$`sample_id`),
+			x = colnames(x = sce)
+		)
+		colnames(x = sce) <- new.names
+    }
     # Add sample ID as colData entry
 	col_data <- SummarizedExperiment::colData(x = sce)
     col_data$sample_id <- args$`sample_id`
     SummarizedExperiment::colData(x = sce) <- col_data
-	# Update row.names with gene symbols
-	row.names(x = sce) <- SummarizedExperiment::rowData(x = sce)$Symbol
     # Sort genes
     sce <- sce[sort(x = row.names(x = sce)),]
     sceasy::convertFormat(
@@ -207,7 +219,10 @@ if(INPUT_FORMAT == 'seurat_rds' & OUTPUT_FORMAT == 'h5ad') {
       	samples = FILE_PATH_IN
     )
     # Set/update row.names with gene symbols
-	row.names(x = sce) <- SummarizedExperiment::rowData(x = sce)$Symbol
+    row_data <- SummarizedExperiment::rowData(x = sce)
+    if("Symbol" %in% colnames(x = row_data)) {
+	    row.names(x = sce) <- row_data$Symbol
+    }
     # Set col.names with barcode ID
     colnames(x = sce) <- SummarizedExperiment::colData(x = sce)$Barcode
     # Tag cell with sample ID