Skip to content

Commit

Permalink
updated to save both filtered and raw calls
Browse files Browse the repository at this point in the history
  • Loading branch information
msauria committed Sep 23, 2024
1 parent afdacd0 commit d5d29f9
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 14 deletions.
22 changes: 18 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ nextflow main.nf -profile rockfish --sample_sheet=isotype_groups.tsv --species=c
--debug Set to 'true' to test (optional)
--sample_sheet TSV with column isotype (needs header) (required)
--masking BED file containing regions to skip during indel calling (optional)
--minsize The minimum size in bp to report for INDELs (optional, default: 50bp)
--maxsize The maximum size in bp to report for INDELs (optional, default: 1000bp)
--output Output folder name (optional) (optional)

--species Species: 'c_elegans', 'c_tropicalis' or 'c_briggsae' (required/optional)
Expand Down Expand Up @@ -122,6 +124,14 @@ Path to the reference strain fasta file.

Path to bed file containing regions to be skipped during INDEL calling. For C. elegans this defaults to HVR calls in test_data/c_elegans_mask.bed.

## --minsize (optional)

The minimum size cutoff for reporting an insertion or deletion (default: 50bp)

## --maxsize (optional)

The maximum size cutoff for reporting an insertion or deletion (default: 1000bp)

## --output (optional)

__default__ - `delly-YYYYMMDD`
Expand All @@ -132,10 +142,14 @@ A directory in which to output results. If you have set `--debug`, the default o

```
└── ANNOTATE_VCF
   ├── AB1.vcf.gz
   ├── AB1.vcf.gz.tbi
   ├── MY23.vcf.gz
└── MY23.vcf.gz.tbi
   ├── AB1_indels_filtered.vcf.gz
   ├── AB1_indels_filtered.vcf.gz.tbi
   ├── AB1_indels_unfiltered.vcf.gz
   ├── AB1_indels_unfiltered.vcf.gz.tbi
   ├── MY23_indels_filtered.vcf.gz
└── MY23_indels_filtered.vcf.gz.tbi
   ├── MY23_indels_unfiltered.vcf.gz
└── MY23_indels_unfiltered.vcf.gz.tbi
```

Expand Down
45 changes: 35 additions & 10 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ nextflow main.nf -profile rockfish --sample_sheet=isotype_groups.tsv --species=c
--debug Set to 'true' to test ${params.debug}
--sample_sheet TSV with column isotype (needs header) ${params.sample_sheet}
--masking BED file containing regions to skip during indel calling ${params.masking}
--minsize The minimum size in bp to report for INDELs ${params.minsize}
--maxsize The maximum size in bp to report for INDELs ${params.maxsize}
--output Output folder name (optional) ${params.outdir}
--species Species: 'c_elegans', 'c_tropicalis' or 'c_briggsae' ${params.species}
Expand Down Expand Up @@ -146,14 +148,19 @@ workflow {
bams = Channel.fromPath("${params.sample_sheet}")
.combine(Channel.of("${params.refstrain}")) | get_isotypes

bams.splitText()
delly_input = bams.splitText()
.map { it.replace("\n", "") }
.map { ["${it}", "${params.refstrain}", file("${params.bam_folder}/${it}.bam"), file("${params.bam_folder}/${it}.bam.bai"), file("${params.bam_folder}/${params.refstrain}.bam"), file("${params.bam_folder}/${params.refstrain}.bam.bai")] }
.combine(Channel.fromPath(params.genome))
.combine(Channel.fromPath(params.genome_index))
.combine(Channel.fromPath(params.mask_file)) | delly_call_indel
.combine(Channel.fromPath(params.mask_file))

delly_call_indel( delly_input )

delly_call_indel.out | convert_to_vcf
delly_input
.join(delly_call_indel.out) | delly_filter_indel

delly_filter_indel.out | convert_to_vcf
}

process get_isotypes {
Expand Down Expand Up @@ -187,13 +194,29 @@ process delly_call_indel {
tuple val(sample), val(control), file(bam), file(bam_index), file(ref_bam), file(ref_bam_index), file(genome), file(genome_index), file(mask)

output:
tuple val(sample), file("${sample}.bcf")
tuple val(sample), file("${sample}_indels_unfiltered.bcf"), file("${sample}_indels_unfiltered.bcf.csi")

"""
if [[ -e ${mask} ]]; then MASK="-x ${mask}"; else MASK=""; fi
delly call -q 20 -g ${genome} -o ${sample}_indels_unfiltered.bcf \$MASK ${bam} ${ref_bam}
"""
}


process delly_filter_indel {

label 'sm'
label "delly"

input:
tuple val(sample), val(control), file(bam), file(bam_index), file(ref_bam), file(ref_bam_index), file(genome), file(genome_index), file(mask), file(indels_bcf), file(indels_bcf_index)

output:
tuple val(sample), file("${indels_bcf}"), file("${indels_bcf_index}"), file("${sample}_indels_filtered.bcf"), file("${sample}_indels_filtered.bcf.csi")

"""
echo -e "${control}\tcontrol\n${sample}\ttumor" > samples.tsv
delly call -q 20 -g ${genome} -o sample.bcf \$MASK ${bam} ${ref_bam}
delly filter -f somatic -o ${sample}.bcf -a 0.75 -p -m 50 -n 1000 -s samples.tsv sample.bcf
delly filter -f somatic -o ${sample}_indels_filtered.bcf -a 0.75 -p -m ${params.minsize} -n ${params.maxsize} -s samples.tsv ${indels_bcf}
"""
}

Expand All @@ -205,14 +228,16 @@ process convert_to_vcf {
publishDir "${params.outdir}/", mode: 'copy'

input:
tuple val(sample), file(bcf)
tuple val(sample), file(unfiltered_bcf), file(unfiltered_bcf_index), file(filtered_bcf), file(filtered_bcf_index)

output:
path("${sample}.vcf.gz*")
path("${sample}_*filtered_indels.vcf.gz*")

"""
bcftools view -v indels -Oz5 -o ${sample}.vcf.gz ${bcf}
tabix -p vcf ${sample}.vcf.gz
bcftools view -v indels -Oz5 -o ${sample}_unfiltered_indels.vcf.gz ${unfiltered_bcf}
tabix -p vcf ${sample}_unfiltered_indels.vcf.gz
bcftools view -v indels -Oz5 -o ${sample}_filtered_indels.vcf.gz ${filtered_bcf}
tabix -p vcf ${sample}_filtered_indels.vcf.gz
"""
}

Expand Down
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ params {
bam_dir = null
output = null
masking = null
minsize = 50
maxsize = 1000
}

profiles {
Expand Down

0 comments on commit d5d29f9

Please sign in to comment.