Skip to content

Commit c3cb0ed

Browse files
committed
Cleaning up errors, adding restrictions
1 parent c62f156 commit c3cb0ed

File tree

14 files changed

+98
-64
lines changed

14 files changed

+98
-64
lines changed

neat/common/io.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
import gzip
1515
import logging
1616
import os
17+
import sys
18+
1719
from pathlib import Path
1820
from typing import Callable, Iterator, TextIO
1921
from Bio import bgzf
2022

21-
log = logging.getLogger(__name__)
23+
_LOG = logging.getLogger(__name__)
2224

2325

2426
def is_compressed(file: str | Path) -> bool:
@@ -85,9 +87,9 @@ def open_output(path: str | Path, mode: str = 'wt') -> Iterator[TextIO]:
8587
8688
Raises
8789
------
88-
FileExistsError
90+
3 = FileExistsError
8991
Raised if the output file already exists.
90-
PermissionError
92+
11 = PermissionError
9193
Raised if the calling process does not have adequate access rights to
9294
write to the output file.
9395
"""
@@ -100,7 +102,8 @@ def open_output(path: str | Path, mode: str = 'wt') -> Iterator[TextIO]:
100102
# bgzf is old code and doesn't use "xt" mode, annoyingly. This manual check should suffice.
101103
if mode == "xt":
102104
if output_path.exists():
103-
raise FileExistsError(f"file '{path}' already exists")
105+
_LOG.error(f"file '{path}' already exists")
106+
sys.exit(3)
104107
else:
105108
mode = "wt"
106109
open_ = bgzf.open
@@ -125,26 +128,28 @@ def validate_input_path(path: str | Path):
125128
126129
Raises
127130
------
128-
FileNotFoundError
131+
5 = FileNotFoundError
129132
Raised if the input file does not exist or is not a file.
130-
RuntimeError
133+
7 = RuntimeError
131134
Raised if the input file is empty.
132-
PermissionError
135+
9 = PermissionError
133136
Raised if the calling process has no read access to the file.
134137
"""
135138
path = Path(path)
136139
mssg = ''
137140

138141
if not path.is_file():
139142
mssg += f"Path '{path}' does not exist or not a file"
140-
raise FileNotFoundError(mssg)
143+
_LOG.error(mssg)
144+
sys.exit(5)
141145
stats = path.stat()
142146
if stats.st_size == 0:
143147
mssg += f"File '{path}' is empty"
144-
raise RuntimeError(mssg)
148+
_LOG.error(mssg)
149+
sys.exit(7)
145150
if not os.access(path, os.R_OK):
146151
mssg += f"cannot read from '{path}': access denied"
147-
raise PermissionError(mssg)
152+
_LOG.error(9)
148153

149154

150155
def validate_output_path(path: str | Path, is_file: bool = True, overwrite: bool = False):
@@ -161,18 +166,20 @@ def validate_output_path(path: str | Path, is_file: bool = True, overwrite: bool
161166
162167
Raises
163168
------
164-
FileExistsError
169+
3 = FileExistsError
165170
Raised if path is a file and already exists.
166-
PermissionError
171+
11 = PermissionError
167172
Raised if the calling process does not have adequate access rights to.
168173
"""
169174
path = Path(path)
170175
if is_file:
171176
if path.is_file() and not overwrite:
172-
raise FileExistsError(f"file '{path}' already exists")
177+
_LOG.error(f"file '{path}' already exists")
178+
sys.exit(3)
173179
else:
174180
if path.is_dir():
175181
if not os.access(path, os.W_OK):
176-
raise PermissionError(f"cannot write to '{path}', access denied")
182+
_LOG.error(f"cannot write to '{path}', access denied")
183+
sys.exit(11)
177184
else:
178185
path.parent.mkdir(parents=True, exist_ok=True)

neat/gen_mut_model/runner.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import os.path
66
import pathlib
77
import pickle
8+
import sys
89

910
import numpy as np
1011
from Bio import SeqIO
1112

12-
1313
from pathlib import Path
1414
import logging
1515

@@ -81,7 +81,7 @@ def runner(reference_index,
8181

8282
if len(ignore) == len(reference_index):
8383
_LOG.error("No valid human chromosome names detected. Check contig names reference.")
84-
raise ValueError
84+
sys.exit(1)
8585

8686
# Pre-parsing to find all the matching chromosomes between ref and vcf
8787
_LOG.info('Processing VCF file...')
@@ -91,7 +91,7 @@ def runner(reference_index,
9191

9292
if not matching_variants or not matching_chromosomes:
9393
_LOG.error("No valid variants detected. Check names in vcf versus reference and/or bed.")
94-
raise ValueError
94+
sys.exit(1)
9595

9696
trinuc_ref_count, bed_track_length = count_trinucleotides(reference_index,
9797
bed,
@@ -100,7 +100,7 @@ def runner(reference_index,
100100

101101
if not trinuc_ref_count:
102102
_LOG.error("No valid trinucleotides detected in reference.")
103-
raise ValueError
103+
sys.exit(1)
104104

105105
"""
106106
Collect and analyze the data in the VCF file
@@ -155,7 +155,7 @@ def runner(reference_index,
155155

156156
else:
157157
_LOG.error(f'Ref allele in variant call does not match reference: {variant}')
158-
raise ValueError
158+
sys.exit(1)
159159

160160
else:
161161
indel_len = len(variant[3]) - len(variant[2])
@@ -214,7 +214,7 @@ def runner(reference_index,
214214
if not total_var:
215215
_LOG.error('Error: No valid variants were found, model could not be created. '
216216
'Check that names are compatible.')
217-
raise ValueError
217+
sys.exit(1)
218218

219219
# COMPUTE PROBABILITIES
220220

neat/gen_mut_model/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def convert_trinuc_transition_matrix(trans_probs):
315315
_LOG.error("Repeat Trinuc detected.")
316316
_LOG.debug(f'Error on {ALL_CONTEXTS[context]}: '
317317
f'{ALLOWED_NUCL[mutation_ref]} -> {ALLOWED_NUCL[mutation_alt]}')
318-
raise ValueError
318+
sys.exit(1)
319319

320320
return ret_matrix
321321

neat/model_fragment_lengths/runner.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pickle
77
import numpy as np
88
import logging
9+
import sys
910

1011
from pathlib import Path
1112

@@ -44,21 +45,21 @@ def compute_fraglen_runner(file: str | Path, filter_minreads: int, output: str |
4445
_LOG.info("Counting fragments")
4546
all_tlens = count_frags(input_file)
4647
if not all_tlens:
47-
raise ValueError("No valid template lengths in sam file_list.")
48+
_LOG.error("No valid template lengths in sam file_list.")
49+
sys.exit(1)
4850

4951
_LOG.info("Filtering fragments")
5052
filtered_lengths = filter_lengths(all_tlens, filter_minreads)
5153

5254
if not filtered_lengths:
53-
raise ValueError("No data passed the filter, nothing to calculate. Try adjusting the filter settings.")
55+
_LOG.error("No data passed the filter, nothing to calculate. Try adjusting the filter settings.")
56+
sys.exit(1)
5457

5558
_LOG.info("Building model")
5659
st_dev = float(np.std(filtered_lengths))
5760
mean = float(np.mean(filtered_lengths))
58-
max_tlen = max(filtered_lengths)
59-
min_tlen = min(filtered_lengths)
6061

61-
model = FragmentLengthModel(st_dev, mean, max_tlen, min_tlen)
62+
model = FragmentLengthModel(mean, st_dev)
6263
_LOG.info(f'Saving model: {output}')
6364
with gzip.open(output, 'w+') as outfile:
6465
pickle.dump(model, outfile)

neat/model_sequencing_error/utils.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
# TODO implement plotting
88
import matplotlib.pyplot as plt
9+
import sys
910

1011
from scipy.stats import mode
1112
from ..common import open_input
@@ -31,7 +32,8 @@ def convert_quality_string(qual_str: str, offset: int):
3132
try:
3233
ret_list.append(ord(qual_str[i]) - offset)
3334
except ValueError:
34-
raise ValueError("improperly formatted fastq file")
35+
_LOG.error("improperly formatted fastq file")
36+
sys.exit(1)
3537

3638
return ret_list
3739

@@ -45,7 +47,8 @@ def expand_counts(count_array: list, scores: list):
4547
:return np.ndarray: a one-dimensional array reflecting the expanded count
4648
"""
4749
if len(count_array) != len(scores):
48-
raise ValueError("Count array and scores have different lengths.")
50+
_LOG.error("Count array and scores have different lengths.")
51+
sys.exit(1)
4952

5053
ret_list = []
5154
for i in range(len(count_array)):
@@ -89,7 +92,8 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
8992
if readlen_mode.count < (0.5 * len(readlens)):
9093
_LOG.warning("Highly variable read lengths detected. Results may be less than ideal.")
9194
if readlen_mode.count < 20:
92-
raise ValueError(f"Dataset is too scarce or inconsistent to make a model. Try a different input.")
95+
_LOG.error(f"Dataset is too scarce or inconsistent to make a model. Try a different input.")
96+
sys.exit(1)
9397
read_length = int(readlen_mode.mode)
9498

9599
else:

neat/models/models.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import re
88
import logging
99
import abc
10+
import sys
1011

1112
from numpy.random import Generator
1213
from Bio.Seq import Seq
@@ -235,7 +236,8 @@ def __init__(self,
235236
self.homozygous_freq = homozygous_freq
236237

237238
if not np.isclose(sum(variant_probs.values()), 1):
238-
raise ValueError("Probabilities do not add up to 1.")
239+
_LOG.error("Probabilities do not add up to 1.")
240+
sys.exit(1)
239241

240242
self.variant_probs = variant_probs
241243
self.transition_matrix = transition_matrix
@@ -558,12 +560,10 @@ def __init__(self,
558560
self.rng = rng
559561

560562
def generate_fragments(self,
561-
total_length: int,
562563
number_of_fragments: int) -> list:
563564
"""
564565
Generates a number of fragments based on the total length needed, and the mean and standard deviation of the set
565566
566-
:param total_length: Length of the reference segment we are covering.
567567
:param number_of_fragments: The number of fragments needed.
568568
:return: A list of fragment random fragment lengths sampled from the model.
569569
"""

neat/read_simulator/utils/bed_func.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55
import logging
66
import pathlib
7+
import sys
78

89
from Bio.File import _IndexedSeqFileDict
910

@@ -103,7 +104,7 @@ def parse_single_bed(input_bed: str,
103104
[my_chr, pos1, pos2] = line_list[:3]
104105
except ValueError:
105106
_LOG.error(f"Improperly formatted bed file line {line}")
106-
raise
107+
sys.exit(1)
107108
# Bed file chromosome names must match the reference.
108109
try:
109110
assert my_chr in reference_dictionary
@@ -122,7 +123,7 @@ def parse_single_bed(input_bed: str,
122123
_LOG.error(f"Invalid mutation rate: {my_chr}: ({pos1}, {pos2})")
123124
_LOG.error('4th column of mutation rate bed must be a semicolon list of key, value '
124125
'pairs, with one key being mut_rate, e.g., "foo=bar;mut_rate=0.001;do=re".')
125-
raise ValueError
126+
sys.exit(1)
126127

127128
# +9 because that's len('mut_rate='). Whatever is that should be our mutation rate.
128129
mut_rate = line_list[3][index + 9:]
@@ -133,7 +134,7 @@ def parse_single_bed(input_bed: str,
133134
_LOG.error(f"Invalid mutation rate: {my_chr}: ({pos1}, {pos2})")
134135
_LOG.error('4th column of mutation rate bed must be a semicolon list of key, value '
135136
'pairs, with one key being mut_rate, e.g., "foo=bar;mut_rate=0.001;do=re".')
136-
raise
137+
sys.exit(1)
137138

138139
if mut_rate > 0.3:
139140
_LOG.warning("Found a mutation rate > 0.3. This is unusual.")

neat/read_simulator/utils/generate_reads.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def cover_dataset(
4343
number_reads = ceil((span_length * options.coverage) / options.read_len)
4444

4545
# We use fragments to model the DNA
46-
fragment_pool = fragment_model.generate_fragments(span_length, number_reads * 3)
46+
fragment_pool = fragment_model.generate_fragments(number_reads * 3)
4747

48-
# step 1: Divide the span up into segments drawn froam the fragment pool. Assign reads based on that.
48+
# step 1: Divide the span up into segments drawn from the fragment pool. Assign reads based on that.
4949
# step 2: repeat above until number of reads exceeds number_reads * 1.5
5050
# step 3: shuffle pool, then draw number_reads (or number_reads/2 for paired ended) reads to be our reads
5151
read_count = 0

neat/read_simulator/utils/generate_variants.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import time
99
import numpy as np
1010
import re
11+
import sys
1112

1213
from Bio import SeqRecord
1314
from numpy.random import Generator
@@ -207,7 +208,8 @@ def generate_variants(reference: SeqRecord,
207208
temp_variant = mutation_model.generate_snv(trinuc, location)
208209

209210
else:
210-
raise ValueError(f"Attempting to create an unsupported variant: {variant_type}")
211+
_LOG.error(f"Attempting to create an unsupported variant: {variant_type}")
212+
sys.exit(1)
211213

212214
# pick which ploid is mutated
213215
temp_variant.genotype = pick_ploids(options.ploidy, mutation_model.homozygous_freq, 1, options.rng)
@@ -226,7 +228,8 @@ def generate_variants(reference: SeqRecord,
226228
# Here's a counter to make sure we're not getting stuck on a single location
227229
debug += 1
228230
if debug > 1000000:
229-
raise RuntimeError("Check this if, as it may be causing an infinite loop.")
231+
_LOG.error("Check this if, as it may be causing an infinite loop.")
232+
sys.exit(999)
230233
# No suitable place to put this, so we skip.
231234
continue
232235
# This sets up a probability array with weights 1 for open spots (x==0) and 0 elsewhere

0 commit comments

Comments
 (0)