Skip to content

Commit

Permalink
fixed validation command
Browse files Browse the repository at this point in the history
  • Loading branch information
friend1ws committed Jun 23, 2023
1 parent 20bf79f commit 9d1f66e
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 84 deletions.
18 changes: 15 additions & 3 deletions nanomonsv/arg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,22 @@ def create_parser():
validate.add_argument("--var_read_min_mapq", default = 40, type = int,
help = "Threshould for mapping quality in validate step")

# validate.add_argument("--use_ssw_lib", default = False, action = 'store_true',
# help = "Use SSW Library. This is for backward comaptibility, and may be removed in the future")
validate.add_argument("--validation_score_ratio_thres", default = 1.2, type = float,
help = "Threshould for threshould for SV segment validation by alignment")

validate.add_argument("--qv10", default = False, action = 'store_true',
help = "Parameter preset for sequencing data with a base quality of around 10. Recommended for ONT data called by Guppy before version 5")

validate.add_argument("--qv15", default = False, action = 'store_true',
help = "Parameter preset for sequencing data with a base quality of around 15. Recommended for ONT data called by Guppy version 5, 6.")

validate.add_argument("--qv20", default = False, action = 'store_true',
help = "Parameter preset for sequencing data with a base quality of around 20. Recommended for ONT data with Q20+ chemistry.")

validate.add_argument("--qv25", default = False, action = 'store_true',
help = "Parameter preset for sequencing data with a base quality above 25. Recommended for PacBio Hifi data.")

validate.add_argument("--sort_option", metavar = "-S 1G", type = str, default = "-S 1G",
validate.add_argument("--sort_option", metavar = "-S 1G", type = str, default = "-S 2G",
help = "options for sort command")

validate.add_argument("--debug", default = False, action = 'store_true', help = "keep intermediate files")
Expand Down
51 changes: 27 additions & 24 deletions nanomonsv/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,31 @@ def validate_main(args):

# executable check
# if False: libssw_check()


# parameter preset
if sum([int(x == True) for x in [args.qv10, args.qv15, args.qv20, args.qv25]]) > 1:
logger.error("Parameter preset (qv10, qv15, qv20, qv25) should be set only once")
sys.exit(1)

if args.qv10:
args.validation_score_ratio_thres = 1.2
elif args.qv15:
args.validation_score_ratio_thres = 1.4
elif args.qv20:
args.validation_score_ratio_thres = 1.6
elif args.qv25:
args.validation_score_ratio_thres = 1.8

# check existences
is_exists_bam(args.tumor_bam)
is_exists(args.reference_fasta)
if args.control_bam is not None: is_exists_bam(args.control_bam)

# BAM format check
bam_cram_format_check(args.tumor_bam, args.reference_fasta)
fasta_format_check(args.reference_fasta)
if args.control_bam is not None: bam_cram_format_check(args.control_bam, args.reference_fasta)


logger.info("Counting the number of supporting read for the tumor by realignment of SV candidate segments")
count_sread_by_alignment(args.sv_list_file, args.tumor_bam,
Expand All @@ -483,36 +507,15 @@ def validate_main(args):
logger.info("Final processing")
control_sread_count_file = args.output + ".realignment.control.sread_count.txt" if args.control_bam is not None else None
integrate_realignment_result(args.output + ".realignment.tumor.sread_count.txt", control_sread_count_file, args.output,
args.reference_fasta, 0, 0, float("inf"), float("inf"))
args.reference_fasta, min_indel_size = 0, min_tumor_variant_read_num = 0, min_tumor_VAF = 0,
max_control_variant_read_num = float("inf"), max_control_VAF = float("inf"))

if not args.debug:
os.remove(args.output + ".realignment.tumor.sread_count.txt")
os.remove(args.output + ".realignment.tumor.sread_info.txt")
os.remove(args.output + ".realignment.control.sread_count.txt")
os.remove(args.output + ".realignment.control.sread_info.txt")
####################
"""
long_read_validate_main(args.sv_list_file,
args.tumor_bam,
args.output + ".validated.txt",
args.output + ".validated.tumor_sread.txt",
args.reference_fasta,
args.control_bam,
args.var_read_min_mapq,
False, args.debug)
is_control = True if args.control_bam is not None else False
filt_final(args.output + ".validated.txt",
args.output + ".validated.tumor_sread.txt",
args.output,
args.output + ".supporting_read.txt",
0, 0, float("inf"), float("inf"), True, is_control)
if not args.debug:
subprocess.check_call(["rm", "-rf", args.output + ".validated.txt"])
subprocess.check_call(["rm", "-rf", args.output + ".validated.tumor_sread.txt"])
"""


def insert_classify_main(args):
Expand Down
13 changes: 6 additions & 7 deletions tests/data/test_tumor/test_tumor.nanomonsv.result.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
Chr_1 Pos_1 Dir_1 Chr_2 Pos_2 Dir_2 Inserted_Seq Checked_Read_Num_Tumor Supporting_Read_Num_Tumor Checked_Read_Num_Control Supporting_Read_Num_Control
chr10 87940538 + chr10 87952584 - AAGAGATTATACTTGTGTA 18 16 36 0
chr11 62010482 + chr11 62010483 - CTCAGGGAGTCAGTTCTAGACCAGCCTGGCCAACGGCAAAACCCCATTCTACTAAAAATAAAGAATTAGCCAGGCATGGTGGCAGTTACTGTCATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACTCAGGAGTCAGAGGTTGCAGTGAGCCGAGATCATGCCACTGCACCCAGCCTGGGCAACACACAGCAAGACTCCATCTCAAAAAAAAAAAAAACGTTTTGGCAGGTGCGGTGGCTCATGCCTGTAATCCCAGCCTTTGGGAGGCCAGGCAGGGTGGATCACTAGGTCAGAGATCGAGACCATCCTGACTAACATGGTGAAACCCCGTCTCTACTAAATGTACAAAAAATTAGCCGGGCATGATGGCAGGTGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGTGTGAACCGGGAGACAGAGCTAGAGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGCAACAGAGGCGAGACTCTGTCTCAAAAAAAAAAAAAAAAATTAGCTGGTGTGGTGGTGCATGCCTGAAGTCCCAGCTACTAGAAATCTGAGGTGGAGGATTGCTTGAGCCCAGGAGGTTGAGACTGCAGTGAGCCACTGGTGCCACTGCCCTCCAGCCTGGGCAACAGAGTGAGACCCCATCTCTAAAAATAAAAACATGGCTGGTAAGGTGGCTCACGCCAACCAGCACTTTGGGAGGCTGAGGCGGATGACC 24 4 18 0
chr12 129287232 + chr12 129287235 - TCTCACTCATAGGTGGGAATTGAACAATGAGATCACATGGACACAGGAAGGGAATATCACTCTGGGACTGTGGTGGGGTGGGGGGGGGGGGAGGGATAGCATTGGGAGATATACCTAATGCTAGATGACACATTAGTGGGTGCAGTGCACCAGCATGGCACATGTATACATATGTAACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAGAGTATAATAAAAAAAAAAAAAAAAAATTAGCCGGGAAAAAAAAAAAAAAAAA 19 11 28 0
chr15 84141974 - chr7 151049571 + --- 46 10 37 0
chr18 68712223 + chr18 68715589 - --- 25 4 24 0
chrX 78265387 + chrX 78265388 - TGAGGGCCCTCACTTCCGCAGTAGGTGGCTGGCAGAGGCGCCCTGGTTCTGGACTGGGCGGCTGGCCAAGGGGCTGACCCCACCTCCTCCCGGACGGGTGGCTGGCCGTGGGGGCTGACTCCCACCTCCCTCCCGGACGGGCACTGGCATG 15 3 10 0
Chr_1 Pos_1 Dir_1 Chr_2 Pos_2 Dir_2 Inserted_Seq SV_ID Checked_Read_Num_Tumor Supporting_Read_Num_Tumor Checked_Read_Num_Control Supporting_Read_Num_Control Is_Filter
chr10 87940538 + chr10 87952584 - ATAGAGATTATACTTTGTGTA r_0 18 16 36 0 PASS
chr11 62010482 + chr11 62010483 - CTCAGGGAGTCAGTTCTAGACCAGCCTGGCCAACGGCAAAACCCCATTCTACTAAAAATAAAGAATTAGCCAGGCATGGTGGCAGTTACTGTCATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACTCAGGAGTCAGAGGTTGCAGTGAGCCGAGATCATGCCACTGCACCCAGCCTGGGCAACACACAGCAAGACTCCATCTCAAAAAAAAAAAAAACGTTTTGGCAGGTGCGGTGGCTCATGCCTGTAATCCCAGCCTTTGGGAGGCCAGGCAGGGTGGATCACTAGGTCAGAGATCGAGACCATCCTGACTAACATGGTGAAACCCCGTCTCTACTAAATGTACAAAAAATTAGCCGGGCATGATGGCAGGTGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGTGTGAACCGGGAGACAGAGCTAGAGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGCAACAGAGGCGAGACTCTGTCTCAAAAAAAAAAAAAAAAATTAGCTGGTGTGGTGGTGCATGCCTGAAGTCCCAGCTACTAGAAATCTGAGGTGGAGGATTGCTTGAGCCCAGGAGGTTGAGACTGCAGTGAGCCACTGGTGCCACTGCCCTCCAGCCTGGGCAACAGAGTGAGACCCCATCTCTAAAAATAAAAACATGGCTGGTAAGGTGGCTCACGCCAACCAGCACTTTGGGAGGCTGAGGCGGATGACC i_0 24 4 18 0 PASS
chr12 129287232 + chr12 129287235 - TCTCACTCATAGGTGGGAATTGAACAATGAGATCACATGGACACAGGAAGGGAATATCACTCTGGGACTGTGGTGGGGTGGGGGGGGGGGGAGGGATAGCATTGGGAGATATACCTAATGCTAGATGACACATTAGTGGGTGCAGTGCACCAGCATGGCACATGTATACATATGTAACTAACCTGCACAATGTGCACATGTACCCTAAAACTTAGAGTATAATAAAAAAAAAAAAAAAAAATTAGCCGGGAAAAAAAAAAAAAAAAA i_1 19 11 28 0 PASS
chr15 84141974 - chr7 151049571 + --- r_1 46 10 37 0 PASS
chr18 68712223 + chr18 68715589 - --- d_2 25 4 24 0 PASS
Loading

0 comments on commit 9d1f66e

Please sign in to comment.