Skip to content

Commit

Permalink
Merge pull request #34 from GeneMAP-Research/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
esohkevin authored Jul 17, 2024
2 parents 2ff1d37 + ef1a211 commit 0462c4a
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 69 deletions.
46 changes: 15 additions & 31 deletions callVariants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@ include {
callVariantsFromGenomicsDB;
callVariantsFromExistingGenomicsDB;
collectIntervalsPerChromosome;
concatPerIntervalVcfs;
concatPerChromIntervalVcfs;
concatPerChromosomeVcfs;
getGvcfFiles;
getGvcfList;
getGenomicsdbWorkspaces;
combineGvcfs;
getVcfGenomicIntervals;
getGenomicIntervalList;
getGenomicInterval;
genotypeGvcfs;
updateGenomicsDbPerInterval;
} from "${projectDir}/modules/variantCallingPipeline.nf"
Expand All @@ -59,8 +62,9 @@ workflow {
// NEW IMPORT
//-=-=-=-=-=-=

gvcfList = getGvcfFiles().toList()

gvcfs = getGvcfFiles().toList()
gvcfList = getGvcfList(gvcfs)
genomicInterval = getGenomicInterval(gvcfList)
genomicsDB = createGenomicsDbPerInterval(genomicInterval, gvcfList)

}
Expand All @@ -71,33 +75,16 @@ workflow {
// genomicsdb workspaces must already exist and gvcfs must be provided with interval list
//-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

gvcfList = getGvcfFiles().toList()

if(params.interval == "NULL") {
genomicInterval = getVcfGenomicIntervals(gvcfList).flatten()
}
else {
genomicInterval = getGenomicIntervalList().flatten()
}

gvcfs = getGvcfFiles().toList()
gvcfList = getGvcfList(gvcfs)
genomicInterval = getGenomicInterval(gvcfList)
workspace = getGenomicsdbWorkspaces().map { wrkspc -> tuple(wrkspc.simpleName, wrkspc) }
genomicInterval
.map { interval -> tuple(interval.simpleName + "_${params.output_prefix}-workspace", interval) }
.join(workspace)
.map {workspaceName, interval, workspace -> tuple(workspaceName, interval, workspace)}
.set { workspace_interval }
updateGenomicsDbPerInterval(workspace_interval, gvcfList)

//workspace = getGenomicsdbWorkspaces().map { wrkspc -> tuple(wrkspc.simpleName, wrkspc) }
//genomicInterval
// .map { interval -> tuple(interval.simpleName + "_${params.output_prefix}-workspace", interval) }
// .join(workspace)
// .map {workspaceName, interval, workspace -> tuple(workspaceName, interval, workspace)}
// .set { workspace_interval }
//vcfs = callVariantsFromExistingGenomicsDB(workspace).view().collect()
//vcfs_per_chrom_list = collectIntervalsPerChromosome(vcfs).flatten()
//concatPerIntervalVcfs(vcfs_per_chrom_list).view()

}
else {

Expand All @@ -109,7 +96,8 @@ workflow {
workspace = getGenomicsdbWorkspaces().map { wrkspc -> tuple(wrkspc.simpleName, wrkspc) }
vcfs = callVariantsFromExistingGenomicsDB(workspace).view().collect()
vcfs_per_chrom_list = collectIntervalsPerChromosome(vcfs).flatten()
concatPerIntervalVcfs(vcfs_per_chrom_list).view()
vcfs_per_chrom = concatPerChromIntervalVcfs(vcfs_per_chrom_list).collect().view()
concatPerChromosomeVcfs(vcfs_per_chrom).view()

}
}
Expand Down Expand Up @@ -180,12 +168,7 @@ workflow {
// Using 'GenomicsDBImport' for efficiency
//-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

if(params.interval == "NULL") {
genomicInterval = getVcfGenomicIntervals(gvcfList).flatten()
}
else {
genomicInterval = getGenomicIntervalList().flatten()
}
genomicInterval = getGenomicInterval()

genomicsDB = createGenomicsDbPerInterval(genomicInterval, gvcfList)

Expand All @@ -197,7 +180,8 @@ workflow {
.set { workspace_interval }
vcfs = callVariantsFromGenomicsDB(workspace_interval).collect()
vcfs_per_chrom_list = collectIntervalsPerChromosome(vcfs).flatten()
concatPerIntervalVcfs(vcfs_per_chrom_list).view()
vcfs_per_chrom = concatPerChromIntervalVcfs(vcfs_per_chrom_list).collect().view()
concatPerChromosomeVcfs(vcfs_per_chrom).view()
}
}

Expand Down
29 changes: 22 additions & 7 deletions callVariantsFromGenomicDBs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ include {
getVcfGenomicIntervals;
getGenomicIntervalList;
genotypeGvcfs;
getGenomicsdbWorkspaces;
createGenomicsDbPerInterval;
callVariantsFromGenomicsDB;
callVariantsFromExistingGenomicsDB;
collectIntervalsPerChromosome;
concatPerChromIntervalVcfs;
glnexusJointCaller;
convertBcfToVcf;
} from "${projectDir}/modules/variantCallingPipeline.nf"
Expand All @@ -22,12 +26,12 @@ workflow {

if(params.joint_caller == "gatk") {

channel.from(1..22,'X','Y','M')
.collect()
.flatten()
.map { chr -> "chr${chr}" }
.combine(gvcfList.toList())
.set { per_chrom_genomicsDB_input }
//channel.from(1..22,'X','Y','M')
// .collect()
// .flatten()
// .map { chr -> "chr${chr}" }
// .combine(gvcfList.toList())
// .set { per_chrom_genomicsDB_input }

if(params.interval == "NULL") {
genomicInterval = getVcfGenomicIntervals(gvcfList).flatten()
Expand All @@ -37,9 +41,20 @@ workflow {

}

workspace = getGenomicsdbWorkspaces().map { wrkspc -> tuple(wrkspc.simpleName, wrkspc) }
genomicInterval
.map { interval -> tuple(interval.simpleName + "_${params.output_prefix}-workspace", interval) }
.join(workspace)
.map {workspaceName, interval, workspace -> tuple(workspaceName, interval, workspace)}
.set { workspace_interval }
vcfs = callVariantsFromGenomicsDB(workspace_interval).collect()
vcfs_per_chrom_list = collectIntervalsPerChromosome(vcfs).flatten()
concatPerChromIntervalVcfs(vcfs_per_chrom_list).view()


//genomicsDB = createGenomicsDbPerChromosome(per_chrom_genomicsDB_input)
//genomicsDB = createGenomicsDbPerInterval(genomicInterval, gvcfList)
callVariantsFromGenomicsDB(genomicsDB).view()
//callVariantsFromGenomicsDB(genomicsDB).view()

/*
combinedGvcf = combineGvcfs(gvcfList)
Expand Down
1 change: 1 addition & 0 deletions configs/containers/singularity.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ includeConfig "${projectDir}/configs/containers/base.config"
singularity {
enabled = true
autoMounts = false
pullTimeout = 1.h
cacheDir = "${params.containers_dir}"
}

20 changes: 17 additions & 3 deletions configs/resource-selector.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,18 @@ process {
beforeScript = 'ulimit -c unlimited; hostname; date; pwd'

// ERROR HANDLING
errorStrategy = { task.exitStatus in [1,2,3,4,5,6,126,127,134,135,136,137,139,140,143,245,247,249,250,255] ? 'retry' : 'terminate' }
//errorStrategy = { task.exitStatus in [1,2,4,5,6,126,127,134,135,136,137,139,140,143,245,247,249,250,255] ? 'retry' : 'terminate' }
errorStrategy = { task.exitStatus in 3 ? 'ignore' : 'retry' }
maxErrors = '-1'
maxRetries = 20
maxRetries = 3

// UPDATING GENOMICS BDs
// DO NOT FAIL ON DUPLICATE SAMPLES: exit code 3
withLabel:updatedbserror {
errorStrategy = { task.exitStatus in 3 ? 'ignore' : 'terminate' }
maxErrors = '-1'
maxRetries = 3
}

// RESOURCE MANAGEMENT //
withLabel:smallMemory {
Expand Down Expand Up @@ -87,10 +96,15 @@ process {
cpus = { params.threads }
memory = 30.GB
}
withLabel:genomisDBImport {
time = { 24.h * task.attempt }
cpus = { params.threads }
memory = 150.GB
}
withLabel:variantCaller {
time = { 24.h * task.attempt }
cpus = { params.threads }
memory = 30.GB
memory = 50.GB
}
withLabel:vqsr {
cpus = { params.threads }
Expand Down
43 changes: 43 additions & 0 deletions includes/get_gvcfinterval_list.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env bash

######################################################
# make non-overlapping intervals of 5,000,000 bp for #
# chromosomes larger than 5,000,000. Otherwise, just #
# print out chromosome length as only interval. #
# HLA contigs will be automatically excluded. To do #
# HLA typing, use the SNP2HLA or equivalent tool #
######################################################

if [ $# -lt 3 ]; then
echo -e "Usage: get_interval_list.sh [gvcf-file] [interval-size] [output-prefix]"
else
gvcf=$1 #"/scratch/eshkev001/projects/wes/higenes/careni/hg38/gvcfs/OS.P001_92401_S25_val.bqsr.g.vcf.gz"
intval=$2
out=$3

zgrep '##contig' ${gvcf} | \
sed 's/[=,>]/\t/g' | \
cut -f3,5 | \
grep -v '^HLA' | \
awk \
-v intvl=${intval} '
{
if( $2 <= intvl ) {
print $1,"0",$2
} else{
for( i=0; i<=$2; i+=intvl ) {
if( i+(intvl-1) < $2 ) {
print $1,i,i+(intvl-1)
} else{
print $1,i,$2
}
}
}
}' \
> ${out}_interval_list.txt
fi

#while read interval; do
# echo $interval > $(echo ${interval} | sed 's/[:*]/_/g' | sed 's/ /_/g').bed
#done < .interval_list

Loading

0 comments on commit 0462c4a

Please sign in to comment.