Skip to content

Commit 866fbd7

Browse files
committed
Fix for fastq2 path failing unlink (deletion), and removing some debugging code.
1 parent e96fd20 commit 866fbd7

File tree

9 files changed

+53
-201
lines changed

9 files changed

+53
-201
lines changed

config_template/simple_template.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@ discard_bed: .
2323
mutation_rate: .
2424
mutation_bed: .
2525
rng_seed: .
26-
min_mutations: 0
26+
min_mutations: .
2727
overwrite_output: .
2828

neat/read_simulator/runner.py

+8-29
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Runner for generate_reads task
33
"""
4-
import copy
4+
import time
55
import logging
66
import pickle
77
import gzip
@@ -10,7 +10,7 @@
1010
from pathlib import Path
1111

1212
from .utils import Options, parse_input_vcf, parse_beds, OutputFileWriter, \
13-
generate_variants, write_local_file, generate_reads
13+
generate_variants, generate_reads
1414
from ..common import validate_input_path, validate_output_path
1515
from ..models import MutationModel, SequencingErrorModel, FragmentLengthModel
1616
from ..models.default_cancer_mutation_model import *
@@ -249,33 +249,24 @@ def read_simulator_runner(config: str, output: str):
249249
# these will be the features common to each contig, for multiprocessing
250250
common_features = {}
251251

252-
all_variants = {} # dict of all ContigVariants objects, indexed by contig, which we will collect at the end.
253-
vcf_files = []
252+
local_variant_files = {}
254253
fastq_files = []
255254

256255
sam_reads_files = []
257256

258257
for contig in breaks:
258+
local_variant_files[contig] = None
259259

260260
_LOG.info(f"Generating variants for {contig}")
261261

262-
# Todo genericize breaks
263-
264262
input_variants = input_variants_dict[contig]
265263
# TODO: add the ability to pick up input variants here from previous loop
266264

267265
local_reference = reference_index[contig]
268266

269-
# _LOG.info(f'Creating trinucleotide map for {contig}...')
270-
# local_trinuc_map = map_chromosome(local_reference, mut_model)
271-
272267
# Since we're only running single threaded for now:
273268
threadidx = 1
274269

275-
local_variant_file = options.temp_dir_path / f'{options.output.stem}_tmp_{contig}_{threadidx}.vcf.gz'
276-
277-
_LOG.debug(f'local vcf filename = {local_variant_file}')
278-
279270
local_bam_pickle_file = None
280271
if options.produce_bam:
281272
local_bam_pickle_file = options.temp_dir_path / f'{options.output.stem}_tmp_{contig}_{threadidx}.p.gz'
@@ -296,20 +287,8 @@ def read_simulator_runner(config: str, output: str):
296287
max_qual_score=max_qual_score,
297288
options=options)
298289

299-
_LOG.info(f'Outputting temp vcf for {contig} for later use')
300-
# This function produces the local vcf file.
301-
# TODO pickle dump the ContigVariants object instead. Combine them into one vcf
302-
# at the end.
303-
write_local_file(
304-
local_variant_file,
305-
local_variants,
306-
local_reference,
307-
target_regions_dict[contig],
308-
discard_regions_dict[contig]
309-
)
310-
311-
# The above function writes data to local_variant_file, so we need only store its location.
312-
vcf_files.append(local_variant_file)
290+
# This function saves the local variant data a dictionary. We may need to write this to file.
291+
local_variant_files[contig] = local_variants
313292

314293
if options.produce_fastq or options.produce_bam:
315294
read1_fastq_paired, read1_fastq_single, read2_fastq_paired, read2_fastq_single = \
@@ -333,15 +312,15 @@ def read_simulator_runner(config: str, output: str):
333312

334313
if options.produce_vcf:
335314
_LOG.info(f"Outputting golden vcf: {str(output_file_writer.vcf_fn)}")
336-
output_file_writer.merge_temp_vcfs(vcf_files)
315+
output_file_writer.write_final_vcf(local_variant_files, reference_index)
337316

338317
if options.produce_fastq:
339318
if options.paired_ended:
340319
_LOG.info(f"Outputting fastq files: "
341320
f"{', '.join([str(x) for x in output_file_writer.fastq_fns]).strip(', ')}")
342321
else:
343322
_LOG.info(f"Outputting fastq file: {output_file_writer.fastq_fns[0]}")
344-
output_file_writer.merge_temp_fastqs(fastq_files, options.paired_ended, options.rng)
323+
output_file_writer.merge_temp_fastqs(fastq_files, options.rng)
345324

346325
if options.produce_bam:
347326
_LOG.info(f"Outputting golden bam file: {str(output_file_writer.bam_fn)}")

neat/read_simulator/utils/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,3 @@
99
from .vcf_func import *
1010
from .generate_reads import *
1111
from .generate_variants import *
12-
from .local_file_writer import *

neat/read_simulator/utils/generate_reads.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22
import time
33
import pickle
4-
import numpy as np
54

65
from math import ceil
76
from pathlib import Path
@@ -14,8 +13,6 @@
1413
from ...variants import ContigVariants
1514
from .read import Read
1615

17-
# TODO check that we're not truncating reads with deletions, but getting a full 151 bases
18-
1916
__all__ = [
2017
'generate_reads',
2118
'cover_dataset',
@@ -199,20 +196,20 @@ def generate_reads(reference: SeqRecord,
199196
base_name = f'NEAT-generated_{chrom}'
200197

201198
_LOG.debug("Covering dataset.")
202-
t = time.process_time()
199+
t = time.time()
203200
reads = cover_dataset(
204201
len(reference),
205202
options,
206203
fraglen_model,
207204
)
208-
_LOG.debug(f"Dataset coverage took: {(time.process_time() - t)/60:.2f} m")
205+
_LOG.debug(f"Dataset coverage took: {(time.time() - t)/60:.2f} m")
209206

210207
# These will hold the values as inserted.
211208
properly_paired_reads = []
212209
singletons = []
213210

214211
_LOG.debug("Writing fastq(s) and optional tsam, if indicated")
215-
t = time.process_time()
212+
t = time.time()
216213
with (
217214
open_output(chrom_fastq_r1_paired) as fq1_paired,
218215
open_output(chrom_fastq_r1_single) as fq1_single,
@@ -361,7 +358,7 @@ def generate_reads(reference: SeqRecord,
361358
else:
362359
singletons.append((None, read_2))
363360

364-
_LOG.info(f"Contig fastq(s) written in: {(time.process_time() - t)/60:.2f} m")
361+
_LOG.info(f"Contig fastq(s) written in: {(time.time() - t)/60:.2f} m")
365362

366363
if options.produce_bam:
367364
# this will give us the proper read order of the elements, for the sam. They are easier to sort now

neat/read_simulator/utils/generate_variants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def generate_variants(reference: SeqRecord,
5656
existing_variants: ContigVariants,
5757
mutation_model: MutationModel,
5858
options: Options,
59-
max_qual_score: int):
59+
max_qual_score: int) -> ContigVariants:
6060
"""
6161
This function will generate variants to add to the dataset, by writing them to the input temp vcf file.
6262

neat/read_simulator/utils/local_file_writer.py

-114
This file was deleted.

neat/read_simulator/utils/options.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,11 @@ def read(self):
207207

208208
# Now we check that the type is correct and it is in range, depending on the type defined for it
209209
# If it passes that it gets put into the args dictionary.
210-
try:
211-
temp = type_of_var(value)
212-
except ValueError:
213-
raise ValueError(f"Incorrect type for value entered for {key}: {type_of_var}")
210+
if value != type_of_var(value):
211+
raise ValueError(f"Incorrect type for value entered for {key}: {type_of_var} (found: {value})")
214212

215-
self.check_and_log_error(key, temp, criteria1, criteria2)
216-
self.args[key] = temp
213+
self.check_and_log_error(key, value, criteria1, criteria2)
214+
self.args[key] = value
217215

218216
def set_random_seed(self):
219217
"""

0 commit comments

Comments
 (0)