Skip to content

Commit

Permalink
coming soon manual patches
Browse files Browse the repository at this point in the history
  • Loading branch information
ncherric committed Aug 29, 2023
1 parent cd2b993 commit c88eab9
Show file tree
Hide file tree
Showing 5 changed files with 476 additions and 0 deletions.
104 changes: 104 additions & 0 deletions workflow/rules/cram_variantCalling-Discovery.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
CHROMS=list(range(1, 23))
CHROMS.extend(['X'])

# conditional statement in sequenceChrStringCheck.smk to either proceed with 1 or add "chr" for "chr1"
rule check_for_chr_string:
input:
SMP = "results/cram/{sample}.cram",
output:
ChrStrCheck="results/vcf/{sample}/chrStrCheck/alignmentFileHeader.out",
params:
sample=lambda wc: wc.get("sample"),
resources:
mem_mb=1500,
runtime="00:10:00",
benchmark:
repeat("benchmarks/check_for_chr_string.{sample}.RawSeq", 1)
script:
"../scripts/chr-string-check-cram.py"

rule cram_variant_calling:
input:
cram="results/cram/{sample}.cram",
cramIndex="results/cram/{sample}.cram.crai",
ref = which_ref,
chrStrCheck="results/vcf/{sample}/chrStrCheck/alignmentFileHeader.out",
regions=CHROMS,
output:
splitChrVCF="results/vcf/{sample}/chr{chroms}.vcf.gz",
params:
sample=lambda wc: wc.get("sample"),
chroms_from_wc=lambda wc: wc.get("chroms"),
extra_mpileup=get_variant_calling_mpileup_params,
extra_call=get_variant_calling_call_params,
extra_NormTrueFalse=get_NormTrueFalse,
extra_normalize=get_NormLeftAlign,
resources:
mem_mb=3500,
runtime="24:00:00",
benchmark:
repeat("benchmarks/cram_variant_calling.{sample}.chr{chroms}.StoredSequence", 1)
script:
"../scripts/cram-variant-calling-discovery.py"


rule variant_calling_index:
input:
ChrVCF="results/vcf/{sample}/chr{chroms}.vcf.gz",
output:
ChrTBI="results/vcf/{sample}/chr{chroms}.vcf.gz.tbi",
resources:
mem_mb=1500,
runtime="00:30:00",
benchmark:
repeat("benchmarks/variant_calling_index.{sample}.chr{chroms}.StoredSequence", 1)
shell:
"""
bcftools index --tbi {input.ChrVCF} -o {output.ChrTBI}
"""


rule merge_sample_chrom_list:
input:
concatChrVCF=expand("results/vcf/{sample}/chr{{chroms}}.vcf.gz", sample=cramSamples["cramSample"]),
concatChrTBI=expand("results/vcf/{sample}/chr{{chroms}}.vcf.gz.tbi", sample=cramSamples["cramSample"]),
output:
mergeList="results/vcf/chr{chroms}-merge-sampleList.txt",
resources:
mem_mb=200,
runtime="00:10:00",
benchmark:
repeat("benchmarks/merge_sample_chrom_list.chr{chroms}.StoredSequence", 1)
script:
"../scripts/merge-sample-chrom-list-discovery.py"

rule merge_samples_per_chrom:
input:
mergeList="results/vcf/chr{chroms}-merge-sampleList.txt",
output:
mergeChrVCF="results/vcf/Merged-chr{chroms}-allSamples.vcf.gz",
params:
chroms_from_wc=lambda wc: wc.get("chroms"),
resources:
mem_mb=10000,
runtime="00:10:00",
benchmark:
repeat("benchmarks/merge_samples_per_chrom.chr{chroms}.StoredSequence", 1)
script:
"../scripts/merge-sample-chroms.py"


rule merge_samples_per_chrom_index:
input:
mergeChrVCF="results/vcf/Merged-chr{chroms}-allSamples.vcf.gz",
output:
mergeChrTBI="results/vcf/Merged-chr{chroms}-allSamples.vcf.gz.tbi",
resources:
mem_mb=1500,
runtime="00:30:00",
benchmark:
repeat("benchmarks/merge_samples_per_chrom_index.chr{chroms}.StoredSequence", 1)
shell:
"""
bcftools index --tbi {input.mergeChrVCF} -o {output.mergeChrTBI}
"""
102 changes: 102 additions & 0 deletions workflow/rules/variantCalling-Discovery.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
CHROMS=list(range(1, 23))
CHROMS.extend(['X'])

# conditional statement in sequenceChrStringCheck.smk to either proceed with 1 or add "chr" for "chr1"
rule check_for_chr_string:
input:
sortedBam="results/sortedBam/{sample}.sorted.bam",
bamIndex="results/sortedBam/{sample}.sorted.bam.bai",
output:
ChrStrCheck="results/vcf/{sample}/chrStrCheck/alignmentFileHeader.out",
params:
sample=lambda wc: wc.get("sample"),
resources:
mem_mb=1500,
runtime="00:10:00",
benchmark:
repeat("benchmarks/check_for_chr_string.{sample}.RawSeq", 1)
script:
"../scripts/chr-string-check-bam.py"


rule variant_calling:
input:
sortedbam="results/sortedBam/{sample}.sorted.bam",
bamIndex="results/sortedBam/{sample}.sorted.bam.bai",
chrStrCheck="results/vcf/{sample}/chrStrCheck/alignmentFileHeader.out",
ref=which_ref,
regions=CHROMS,
output:
ChrVCF="results/vcf/{sample}/chr{chroms}.vcf.gz",
resources:
mem_mb=2500,
runtime="24:00:00",
params:
sample=lambda wc: wc.get("sample"),
extra_mpileup=get_variant_calling_mpileup_params,
extra_call=get_variant_calling_call_params,
extra_NormTrueFalse=get_NormTrueFalse,
extra_normalize=get_NormLeftAlign,
benchmark:
repeat("benchmarks/variant_calling.{sample}.chr{chroms}.RawSeq", 1)
script:
"../scripts/variant-calling-discovery.py"

rule variant_calling_index:
input:
ChrVCF="results/vcf/{sample}/chr{chroms}.vcf.gz",
output:
ChrTBI="results/vcf/{sample}/chr{chroms}.vcf.gz.tbi",
resources:
mem_mb=1500,
runtime="06:00:00",
benchmark:
repeat("benchmarks/variant_calling_index.{sample}.chr{chroms}.RawSeq", 1)
shell:
"""
bcftools index --tbi {input.ChrVCF} -o {output.ChrTBI}
"""

rule merge_sample_chrom_list:
input:
concatChrVCF=expand("results/vcf/{sample}/chr{{chroms}}.vcf.gz", sample=samples["sample"]),
concatChrTBI=expand("results/vcf/{sample}/chr{{chroms}}.vcf.gz.tbi", sample=samples["sample"]),
output:
mergeList="results/vcf/chr{chroms}-merge-sampleList.txt",
resources:
mem_mb=200,
runtime="00:10:00",
benchmark:
repeat("benchmarks/merge_sample_chrom_list.chr{chroms}.RawSeq", 1)
script:
"../scripts/merge-sample-chrom-list.py"

rule merge_samples_per_chrom:
input:
mergeList="results/vcf/chr{chroms}-merge-sampleList.txt",
output:
mergeChrVCF="results/vcf/Merged-chr{chroms}-allSamples.vcf.gz",
params:
chroms_from_wc=lambda wc: wc.get("chroms"),
resources:
mem_mb=10000,
runtime="00:10:00",
benchmark:
repeat("benchmarks/merge_samples_per_chrom.chr{chroms}.RawSeq", 1)
script:
"../scripts/merge-sample-chroms.py"

rule merge_samples_per_chrom_index:
input:
mergeChrVCF="results/vcf/Merged-chr{chroms}-allSamples.vcf.gz",
output:
mergeChrTBI="results/vcf/Merged-chr{chroms}-allSamples.vcf.gz.tbi",
resources:
mem_mb=1500,
runtime="00:30:00",
benchmark:
repeat("benchmarks/merge_samples_per_chrom_index.chr{chroms}.RawSeq", 1)
shell:
"""
bcftools index --tbi {input.mergeChrVCF} -o {output.mergeChrTBI}
"""
127 changes: 127 additions & 0 deletions workflow/scripts/cram-variant-calling-discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from os import path
from snakemake.shell import shell

# Extract arguments.
extra = snakemake.params.get("extra", "")
extra_mpileup = snakemake.params.get("extra_mpileup", "")
extra_call = snakemake.params.get("extra_call", "")
extra_NormTrueFalse = snakemake.params.get("extra_NormTrueFalse", "")
extra_normalize = snakemake.params.get("extra_normalize", "")

sample = snakemake.params.get("sample", "")
chromsWC = snakemake.params.get("chroms_from_wc", "")

reference = snakemake.input.ref
# comment block below if you have to add specific path to referene in cram_variantCalling.smk file
# if isinstance(reference, str):
# reference = path.splitext(snakemake.input.ref)[0]
# else:
# reference = path.splitext(snakemake.input.ref[0])[0]

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

shell(
"mkdir -p results/vcf/{sample}/TempFiles/chr{chromsWC}"
)

def findSequenceName(Filename,Value):
with open(Filename) as f:
if Value in f.read(): # read if value is in file
print('chr1 is present, thus, chr string exists')
print('Adding chr to NYGC regions file')
return True
else:
return False
f.close()

Norm = extra_NormTrueFalse

filename = snakemake.input.chrStrCheck
value = 'SN:chr1'

chrStrYes = findSequenceName(filename,value)

if Norm == True:
if chrStrYes == True:
# keeps the chr string for "chr1"
shell(
"bcftools mpileup"
" -f {reference}"
" {extra_mpileup}"
" -r {snakemake.input.regions}"
" -O u"
" results/cram/{sample}.cram |"
" bcftools call"
" {extra_call}"
" -O u |"
" bcftools sort"
" -m 2G"
" -T results/vcf/{sample}/TempFiles/chr{chromsWC}"
" -O u |"
" bcftools norm -f {reference}"
" {extra_normalize}"
" -O z"
" -o {snakemake.output.ChrVCF}"
)

elif chrStrYes == False:
# removes the chr string for "1"
shell(
"bcftools mpileup"
" -f {reference}"
" {extra_mpileup}"
" -r {snakemake.input.regions}"
" -O u"
" results/cram/{sample}.cram |"
" bcftools call"
" {extra_call}"
" -O u |"
" bcftools sort"
" -m 2G"
" -T results/vcf/{sample}/TempFiles/chr{chromsWC}"
" -O u |"
" bcftools norm -f {reference}"
" {extra_normalize}"
" -O z"
" -o {snakemake.output.ChrVCF}"
)

elif Norm == False:
if chrStrYes == True:
# keeps the chr string for "chr1"
shell(
"bcftools mpileup"
" -f {reference}"
" {extra_mpileup}"
" -r {snakemake.input.regions}"
" -O u"
" results/cram/{sample}.cram |"
" bcftools call"
" {extra_call}"
" -O u |"
" bcftools sort"
" -m 2G"
" -T results/vcf/{sample}/TempFiles/chr{chromsWC}"
" -O z"
" -o {snakemake.output.ChrVCF}"
)

elif chrStrYes == False:
# removes the chr string for "1"
shell(
"bcftools mpileup"
" -f {reference}"
" {extra_mpileup}"
" -r {snakemake.input.regions}"
" -O u"
" results/cram/{sample}.cram |"
" bcftools call"
" {extra_call}"
" -O u |"
" bcftools sort"
" -m 2G"
" -T results/vcf/{sample}/TempFiles/chr{chromsWC}"
" -O z"
" -o {snakemake.output.ChrVCF}"
)

16 changes: 16 additions & 0 deletions workflow/scripts/merge-sample-chrom-list-discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from os import path
import re
import tempfile
from snakemake.shell import shell

# Extract arguments.
# extra = snakemake.params.get("extra", "")

log = snakemake.log_fmt_shell(stdout=False, stderr=True)
sed_arg = [ "sed 's/ /\\n/g'" ]

shell(
"echo {snakemake.input.ChrVCF} |"
" {sed_arg} - > {snakemake.output.mergeList}"
)

Loading

0 comments on commit c88eab9

Please sign in to comment.