updated to save both filtered and raw calls

AndersenLab · Sep 23, 2024 · d5d29f9 · d5d29f9
1 parent afdacd0
commit d5d29f9
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -29,6 +29,8 @@ nextflow main.nf -profile rockfish --sample_sheet=isotype_groups.tsv --species=c
     --debug              Set to 'true' to test                                    (optional)
     --sample_sheet       TSV with column isotype (needs header)                   (required)
     --masking            BED file containing regions to skip during indel calling (optional)
+    --minsize            The minimum size in bp to report for INDELs              (optional, default: 50bp)
+    --maxsize            The maximum size in bp to report for INDELs              (optional, default: 1000bp)
     --output             Output folder name (optional)                            (optional)
 
     --species            Species: 'c_elegans', 'c_tropicalis' or 'c_briggsae'     (required/optional)
@@ -122,6 +124,14 @@ Path to the reference strain fasta file.
 
 Path to bed file containing regions to be skipped during INDEL calling. For C. elegans this defaults to HVR calls in test_data/c_elegans_mask.bed.
 
+## --minsize (optional)
+
+The minimum size cutoff for reporting an insertion or deletion (default: 50bp)
+
+## --maxsize (optional)
+
+The maximum size cutoff for reporting an insertion or deletion (default: 1000bp)
+
 ## --output (optional)
 
 __default__ - `delly-YYYYMMDD`
@@ -132,10 +142,14 @@ A directory in which to output results. If you have set `--debug`, the default o
 
 ```
 └── ANNOTATE_VCF
-    ├── AB1.vcf.gz
-    ├── AB1.vcf.gz.tbi
-    ├── MY23.vcf.gz
-    └── MY23.vcf.gz.tbi
+    ├── AB1_indels_filtered.vcf.gz
+    ├── AB1_indels_filtered.vcf.gz.tbi
+    ├── AB1_indels_unfiltered.vcf.gz
+    ├── AB1_indels_unfiltered.vcf.gz.tbi
+    ├── MY23_indels_filtered.vcf.gz
+    └── MY23_indels_filtered.vcf.gz.tbi
+    ├── MY23_indels_unfiltered.vcf.gz
+    └── MY23_indels_unfiltered.vcf.gz.tbi
 
 ```
 

diff --git a/main.nf b/main.nf
@@ -116,6 +116,8 @@ nextflow main.nf -profile rockfish --sample_sheet=isotype_groups.tsv --species=c
     --debug              Set to 'true' to test                                    ${params.debug}
     --sample_sheet       TSV with column isotype (needs header)                   ${params.sample_sheet}
     --masking            BED file containing regions to skip during indel calling ${params.masking}
+    --minsize            The minimum size in bp to report for INDELs              ${params.minsize}
+    --maxsize            The maximum size in bp to report for INDELs              ${params.maxsize}
     --output             Output folder name (optional)                            ${params.outdir}
     
     --species            Species: 'c_elegans', 'c_tropicalis' or 'c_briggsae'     ${params.species}
@@ -146,14 +148,19 @@ workflow {
     bams = Channel.fromPath("${params.sample_sheet}")
         .combine(Channel.of("${params.refstrain}")) | get_isotypes
 
-    bams.splitText()
+    delly_input = bams.splitText()
         .map { it.replace("\n", "") }
         .map { ["${it}", "${params.refstrain}", file("${params.bam_folder}/${it}.bam"), file("${params.bam_folder}/${it}.bam.bai"), file("${params.bam_folder}/${params.refstrain}.bam"), file("${params.bam_folder}/${params.refstrain}.bam.bai")] }
         .combine(Channel.fromPath(params.genome))
         .combine(Channel.fromPath(params.genome_index))
-        .combine(Channel.fromPath(params.mask_file)) | delly_call_indel
+        .combine(Channel.fromPath(params.mask_file))
+
+    delly_call_indel( delly_input )
 
-    delly_call_indel.out | convert_to_vcf
+    delly_input
+        .join(delly_call_indel.out) | delly_filter_indel
+
+    delly_filter_indel.out | convert_to_vcf
 }
 
 process get_isotypes {
@@ -187,13 +194,29 @@ process delly_call_indel {
         tuple val(sample), val(control), file(bam), file(bam_index), file(ref_bam), file(ref_bam_index), file(genome), file(genome_index), file(mask)
 
     output:
-        tuple val(sample), file("${sample}.bcf")
+        tuple val(sample), file("${sample}_indels_unfiltered.bcf"), file("${sample}_indels_unfiltered.bcf.csi")
 
     """
     if [[ -e ${mask} ]]; then MASK="-x ${mask}"; else MASK=""; fi
+    delly call -q 20 -g ${genome} -o ${sample}_indels_unfiltered.bcf \$MASK ${bam} ${ref_bam}
+    """
+}
+
+
+process delly_filter_indel {
+
+    label 'sm'
+    label "delly"
+
+    input:
+        tuple val(sample), val(control), file(bam), file(bam_index), file(ref_bam), file(ref_bam_index), file(genome), file(genome_index), file(mask), file(indels_bcf), file(indels_bcf_index)
+
+    output:
+        tuple val(sample), file("${indels_bcf}"), file("${indels_bcf_index}"), file("${sample}_indels_filtered.bcf"), file("${sample}_indels_filtered.bcf.csi")
+
+    """
     echo -e "${control}\tcontrol\n${sample}\ttumor" > samples.tsv
-    delly call -q 20 -g ${genome} -o sample.bcf \$MASK ${bam} ${ref_bam}
-    delly filter -f somatic -o ${sample}.bcf -a 0.75 -p -m 50 -n 1000 -s samples.tsv sample.bcf
+    delly filter -f somatic -o ${sample}_indels_filtered.bcf -a 0.75 -p -m ${params.minsize} -n ${params.maxsize} -s samples.tsv ${indels_bcf}
     """
 }
 
@@ -205,14 +228,16 @@ process convert_to_vcf {
     publishDir "${params.outdir}/", mode: 'copy'
 
     input:
-        tuple val(sample), file(bcf)
+        tuple val(sample), file(unfiltered_bcf), file(unfiltered_bcf_index), file(filtered_bcf), file(filtered_bcf_index)
 
     output:
-        path("${sample}.vcf.gz*")
+        path("${sample}_*filtered_indels.vcf.gz*")
 
     """
-    bcftools view -v indels -Oz5 -o ${sample}.vcf.gz ${bcf}
-    tabix -p vcf ${sample}.vcf.gz
+    bcftools view -v indels -Oz5 -o ${sample}_unfiltered_indels.vcf.gz ${unfiltered_bcf}
+    tabix -p vcf ${sample}_unfiltered_indels.vcf.gz
+    bcftools view -v indels -Oz5 -o ${sample}_filtered_indels.vcf.gz ${filtered_bcf}
+    tabix -p vcf ${sample}_filtered_indels.vcf.gz
     """
 }
 

diff --git a/nextflow.config b/nextflow.config
@@ -14,6 +14,8 @@ params {
     bam_dir = null
     output = null
     masking = null
+    minsize = 50
+    maxsize = 1000
 }
 
 profiles {