Skip to content

Commit 9a84e12

Browse files
committed
Couple small edits to vcf_func to improve time. Seems the contig variants container is eating up all my memory, trying to debug that
1 parent 0793f4d commit 9a84e12

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

neat/cli/commands/model_sequencing_error.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def add_arguments(self, parser: argparse.ArgumentParser):
4949
nargs='+',
5050
required=False,
5151
default=42,
52-
help="Quality score max. The default 42. The lowest possible score is 1")
52+
help="Quality score max. The default 42. The lowest possible score is 1. To used binned"
53+
"scoring, enter a space separated list of scores, e.g., -Q 2 15 23 37")
5354

5455
parser.add_argument('-m',
5556
type=int,

neat/read_simulator/utils/vcf_func.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,20 @@ def parse_input_vcf(input_dict: dict[str: ContigVariants],
148148
SAMPLE1 [9, optional]
149149
SAMPLE2 [10, optional, cancer only]
150150
"""
151-
# First, let's check if the chromosome for this record is even in the reference:
152-
in_ref = record[0] in reference
151+
# First, let's check if the chromosome for this record is even in the reference. Since input_dict is
152+
# constructed from the reference, the keys list is the same.
153+
in_ref = record[0] in input_dict.keys()
153154
if not in_ref:
154155
_LOG.warning(f'Skipping variant because the chromosome is not in the reference:\n{line}')
155156
continue
156157

158+
reference_string = reference[record[0]][int(record[1]): int(record[1]) + len(record[3])].seq.upper()
157159
# We already accounted for shifting to 0-based coordinates, so this should work.
158-
if record[3] != str(reference[record[0]][int(record[1]): int(record[1]) + len(record[3])].seq):
160+
if record[3] != str(reference_string):
159161
mismatched += 1
160162
_LOG.warning(f'Skipping variant because the ref field did not match the reference:'
161163
f'{record[0]}: {record[1]}, {record[3]} v '
162-
f'{reference[record[0]][int(record[1]): int(record[1]) + len(record[3])].seq}')
164+
f'{reference_string}')
163165
continue
164166

165167
# We'll need the genotype when we generate reads, and output the records, if applicable

0 commit comments

Comments
 (0)