Skip to content

Commit

Permalink
Germline CNV WDLs for WGS (#6607)
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalker174 authored Aug 11, 2020
1 parent ef71145 commit b1688d9
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 14 deletions.
73 changes: 72 additions & 1 deletion scripts/cnv_wdl/cnv_common_tasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ task CollectCounts {
File ref_fasta
File ref_fasta_fai
File ref_fasta_dict
Array[String]? disabled_read_filters
Boolean? enable_indexing
String? format
File? gatk4_jar_override
Expand All @@ -201,11 +202,22 @@ task CollectCounts {
Int? preemptible_attempts
}

parameter_meta {
bam: {
localization_optional: true
}
bam_idx: {
localization_optional: true
}
}

Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 1000

Boolean enable_indexing_ = select_first([enable_indexing, false])

Array[String] disabled_read_filters_arr = if defined(disabled_read_filters) then prefix("--disable-read-filter ", select_first([disabled_read_filters])) else []

# Sample name is derived from the bam filename
String base_filename = basename(bam, ".bam")
String format_ = select_first([format, "HDF5"])
Expand Down Expand Up @@ -257,7 +269,8 @@ task CollectCounts {
--reference ~{ref_fasta} \
--format ~{default="HDF5" hdf5_or_tsv_or_null_format} \
--interval-merging-rule OVERLAPPING_ONLY \
--output ~{counts_filename_for_collect_read_counts}
--output ~{counts_filename_for_collect_read_counts} \
~{sep=' ' disabled_read_filters_arr}

if [ ~{do_block_compression} = "true" ]; then
bgzip ~{counts_filename_for_collect_read_counts}
Expand Down Expand Up @@ -303,6 +316,15 @@ task CollectAllelicCounts {
Int? preemptible_attempts
}

parameter_meta {
bam: {
localization_optional: true
}
bam_idx: {
localization_optional: true
}
}

Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
Int command_mem_mb = machine_mem_mb - 1000

Expand Down Expand Up @@ -605,3 +627,52 @@ task CollectModelQualityMetrics {
String qc_status_string = read_string("qcStatus.txt")
}
}

task ScatterPloidyCallsBySample {
input {
File contig_ploidy_calls_tar
Array[String] samples

# Runtime parameters
String docker
Int? mem_gb
Int? disk_space_gb
Boolean use_ssd = false
Int? cpu
Int? preemptible_attempts
}

Int num_samples = length(samples)
String out_dir = "calls_renamed"

command <<<
set -eu

# Extract ploidy calls
mkdir calls
tar xzf ~{contig_ploidy_calls_tar} -C calls/

# Archive call files by sample, renaming so they will be glob'd in order
sample_ids=(~{sep=" " samples})
num_samples=~{num_samples}
num_digits=${#num_samples}
for (( i=0; i<~{num_samples}; i++ ))
do
sample_id=${sample_ids[$i]}
padded_sample_index=$(printf "%0${num_digits}d" $i)
tar -czf sample_${padded_sample_index}.${sample_id}.contig_ploidy_calls.tar.gz -C calls/SAMPLE_${i} .
done
>>>

runtime {
docker: docker
memory: select_first([mem_gb, 2]) + " GiB"
disks: "local-disk " + select_first([disk_space_gb, 10]) + if use_ssd then " SSD" else " HDD"
cpu: select_first([cpu, 1])
preemptible: select_first([preemptible_attempts, 5])
}

output {
Array[File] sample_contig_ploidy_calls_tar = glob("sample_*.contig_ploidy_calls.tar.gz")
}
}
18 changes: 10 additions & 8 deletions scripts/cnv_wdl/germline/cnv_germline_case_scattered_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ workflow CNVGermlineCaseScatteredWorkflow {
##############################################
#### optional arguments for CollectCounts ####
##############################################
Array[String]? disabled_read_filters_for_collect_counts
String? collect_counts_format
Boolean? collect_counts_enable_indexing
Int? mem_gb_for_collect_counts
Expand Down Expand Up @@ -149,6 +150,7 @@ workflow CNVGermlineCaseScatteredWorkflow {
preemptible_attempts = preemptible_attempts,
padding = padding,
bin_length = bin_length,
disabled_read_filters_for_collect_counts = disabled_read_filters_for_collect_counts,
collect_counts_format = collect_counts_format,
collect_counts_enable_indexing = collect_counts_enable_indexing,
mem_gb_for_collect_counts = mem_gb_for_collect_counts,
Expand Down Expand Up @@ -196,16 +198,16 @@ workflow CNVGermlineCaseScatteredWorkflow {

output {
Array[File] preprocessed_intervals = CNVGermlineCaseWorkflow.preprocessed_intervals
Array[Array[File]] read_counts_entity_id = CNVGermlineCaseWorkflow.read_counts_entity_id
Array[Array[File]] read_counts = CNVGermlineCaseWorkflow.read_counts
Array[File] contig_ploidy_calls_tars = CNVGermlineCaseWorkflow.contig_ploidy_calls_tar
Array[File] read_counts_entity_id = flatten(CNVGermlineCaseWorkflow.read_counts_entity_id)
Array[File] read_counts = flatten(CNVGermlineCaseWorkflow.read_counts)
Array[File] sample_contig_ploidy_calls_tars = flatten(CNVGermlineCaseWorkflow.sample_contig_ploidy_calls_tars)
Array[Array[Array[File]]] gcnv_calls_tars = CNVGermlineCaseWorkflow.gcnv_calls_tars
Array[Array[File]] gcnv_tracking_tars = CNVGermlineCaseWorkflow.gcnv_tracking_tars
Array[Array[File]] genotyped_intervals_vcf = CNVGermlineCaseWorkflow.genotyped_intervals_vcf
Array[Array[File]] genotyped_segments_vcf = CNVGermlineCaseWorkflow.genotyped_segments_vcf
Array[Array[File]] qc_status_files = CNVGermlineCaseWorkflow.qc_status_files
Array[Array[String]] qc_status_strings = CNVGermlineCaseWorkflow.qc_status_strings
Array[Array[File]] denoised_copy_ratios = CNVGermlineCaseWorkflow.denoised_copy_ratios
Array[File] genotyped_intervals_vcf = flatten(CNVGermlineCaseWorkflow.genotyped_intervals_vcf)
Array[File] genotyped_segments_vcf = flatten(CNVGermlineCaseWorkflow.genotyped_segments_vcf)
Array[File] denoised_copy_ratios = flatten(CNVGermlineCaseWorkflow.denoised_copy_ratios)
Array[File] qc_status_files = flatten(CNVGermlineCaseWorkflow.qc_status_files)
Array[String] qc_status_strings = flatten(CNVGermlineCaseWorkflow.qc_status_strings)
}
}

Expand Down
18 changes: 15 additions & 3 deletions scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ workflow CNVGermlineCaseWorkflow {
##############################################
#### optional arguments for CollectCounts ####
##############################################
Array[String]? disabled_read_filters_for_collect_counts
String? collect_counts_format
Boolean? collect_counts_enable_indexing
Int? mem_gb_for_collect_counts
Expand Down Expand Up @@ -116,6 +117,8 @@ workflow CNVGermlineCaseWorkflow {
###################################################
Int ref_copy_number_autosomal_contigs
Array[String]? allosomal_contigs
Int? disk_space_gb_for_postprocess_germline_cnv_calls
Int? mem_gb_for_postprocess_germline_cnv_calls

##########################
#### arguments for QC ####
Expand Down Expand Up @@ -150,6 +153,7 @@ workflow CNVGermlineCaseWorkflow {
ref_fasta_dict = ref_fasta_dict,
format = collect_counts_format,
enable_indexing = collect_counts_enable_indexing,
disabled_read_filters = disabled_read_filters_for_collect_counts,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_collect_counts,
Expand Down Expand Up @@ -253,18 +257,26 @@ workflow CNVGermlineCaseWorkflow {
}
}

call CNVTasks.ScatterPloidyCallsBySample {
input :
contig_ploidy_calls_tar = DetermineGermlineContigPloidyCaseMode.contig_ploidy_calls_tar,
samples = CollectCounts.entity_id,
docker = gatk_docker,
preemptible_attempts = preemptible_attempts
}
output {
File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
Array[File] read_counts_entity_id = CollectCounts.entity_id
Array[File] read_counts = CollectCounts.counts
File contig_ploidy_calls_tar = DetermineGermlineContigPloidyCaseMode.contig_ploidy_calls_tar
Array[File] sample_contig_ploidy_calls_tars = ScatterPloidyCallsBySample.sample_contig_ploidy_calls_tar
Array[Array[File]] gcnv_calls_tars = GermlineCNVCallerCaseMode.gcnv_call_tars
Array[File] gcnv_tracking_tars = GermlineCNVCallerCaseMode.gcnv_tracking_tar
Array[File] genotyped_intervals_vcf = PostprocessGermlineCNVCalls.genotyped_intervals_vcf
Array[File] genotyped_segments_vcf = PostprocessGermlineCNVCalls.genotyped_segments_vcf
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
Array[File] qc_status_files = CollectSampleQualityMetrics.qc_status_file
Array[String] qc_status_strings = CollectSampleQualityMetrics.qc_status_string
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
}
}

Expand Down Expand Up @@ -314,7 +326,7 @@ task DetermineGermlineContigPloidyCaseMode {
--mapping-error-rate ~{default="0.01" mapping_error_rate} \
--sample-psi-scale ~{default="0.0001" sample_psi_scale}

tar czf case-contig-ploidy-calls.tar.gz -C ~{output_dir_}/case-calls .
tar c -C ~{output_dir_}/case-calls . | gzip -1 > case-contig-ploidy-calls.tar.gz

rm -rf contig-ploidy-model
>>>
Expand Down
16 changes: 14 additions & 2 deletions scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ workflow CNVGermlineCohortWorkflow {
##############################################
#### optional arguments for CollectCounts ####
##############################################
Array[String]? disabled_read_filters_for_collect_counts
String? collect_counts_format
Boolean? collect_counts_enable_indexing
Int? mem_gb_for_collect_counts
Expand Down Expand Up @@ -152,6 +153,8 @@ workflow CNVGermlineCohortWorkflow {
#### arguments for PostprocessGermlineCNVCalls ####
###################################################
Int ref_copy_number_autosomal_contigs
Int? mem_gb_for_postprocess_germline_cnv_calls
Int? disk_space_gb_for_postprocess_germline_cnv_calls
Array[String]? allosomal_contigs

##########################
Expand Down Expand Up @@ -206,6 +209,7 @@ workflow CNVGermlineCohortWorkflow {
ref_fasta_dict = ref_fasta_dict,
format = collect_counts_format,
enable_indexing = collect_counts_enable_indexing,
disabled_read_filters = disabled_read_filters_for_collect_counts,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_collect_counts,
Expand Down Expand Up @@ -353,24 +357,32 @@ workflow CNVGermlineCohortWorkflow {
preemptible_attempts = preemptible_attempts
}

call CNVTasks.ScatterPloidyCallsBySample {
input :
contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar,
samples = CollectCounts.entity_id,
docker = gatk_docker,
preemptible_attempts = preemptible_attempts
}
output {
File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
Array[File] read_counts_entity_ids = CollectCounts.entity_id
Array[File] read_counts = CollectCounts.counts
File? annotated_intervals = AnnotateIntervals.annotated_intervals
File filtered_intervals = FilterIntervals.filtered_intervals
File contig_ploidy_model_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_model_tar
File contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar
Array[File] sample_contig_ploidy_calls_tars = ScatterPloidyCallsBySample.sample_contig_ploidy_calls_tar
Array[File] gcnv_model_tars = GermlineCNVCallerCohortMode.gcnv_model_tar
Array[Array[File]] gcnv_calls_tars = GermlineCNVCallerCohortMode.gcnv_call_tars
Array[File] gcnv_tracking_tars = GermlineCNVCallerCohortMode.gcnv_tracking_tar
Array[File] genotyped_intervals_vcfs = PostprocessGermlineCNVCalls.genotyped_intervals_vcf
Array[File] genotyped_segments_vcfs = PostprocessGermlineCNVCalls.genotyped_segments_vcf
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
Array[File] sample_qc_status_files = CollectSampleQualityMetrics.qc_status_file
Array[String] sample_qc_status_strings = CollectSampleQualityMetrics.qc_status_string
File model_qc_status_file = CollectModelQualityMetrics.qc_status_file
String model_qc_string = CollectModelQualityMetrics.qc_status_string
Array[File] denoised_copy_ratios = PostprocessGermlineCNVCalls.denoised_copy_ratios
}
}

Expand Down

0 comments on commit b1688d9

Please sign in to comment.