Skip to content

Commit 75b74da

Browse files
committed
Fixing discard bed regions
1 parent 4af7fb3 commit 75b74da

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

neat/read_simulator/utils/generate_reads.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -313,21 +313,30 @@ def replace_n(segment: Seq, rng: Generator) -> Seq:
313313
return Seq(modified_segment)
314314

315315

316-
def modify_target_coverage(included_regions: list, excluded_regions: list, coverage_vector: np.ndarray):
316+
def modify_target_coverage(target_regions: list, discard_regions: list, coverage_vector: np.ndarray, coverage: int):
317317
"""
318318
Modifies the coverage vector by applying the list of regions. For this version, areas
319319
outside the regions have coverage adjusted by the off_target_percent
320320
321-
:param included_regions: A list of intervals to target, extracted from a bed file
322-
:param excluded_regions: A list of regions to throw out, extracted from a bed file
321+
:param target_regions: A list of intervals to target, extracted from a bed file
322+
:param discard_regions: A list of regions to throw out, extracted from a bed file
323323
:param coverage_vector: The target coverage vector, which will be modified
324324
:return: The updated target coverage vector.
325325
"""
326326

327327
# this will tabulate values for included regions first, then for excluded regions. Hopefully both are not present.
328328
# If only one is present, the other should not change it.
329-
for region in included_regions + excluded_regions:
330-
coverage_vector[region[0]: region[1]] = coverage_vector[region[0]: region[1]] * region[2]
329+
330+
for region in target_regions:
331+
# in the included regions, any are marked false is to be excluded. Everything else remains untouched
332+
if not region[2]:
333+
coverage_vector[region[0]: region[1]] = [0] * (region[1] - region[0])
334+
335+
for region in discard_regions:
336+
# in the discard regions section, a True indicates this region falls
337+
# within a discard bed and should be discarded
338+
if region[2]:
339+
coverage_vector[region[0]: region[1]] = [0] * (region[1] - region[0])
331340

332341
return coverage_vector
333342

@@ -388,18 +397,17 @@ def generate_reads(reference: SeqRecord,
388397
start = time.time()
389398

390399
base_name = f'{Path(options.output).name}-{chrom}'
391-
# We want to bin the coverage into window-sized segments to speed up calculations.
392-
# This divides the segment into len(reference) // window_size rounded up to the nearest int.
393-
target_shape = ceil(len(reference) // gc_bias.window_size)
394400
# Assume uniform coverage
395-
target_coverage_vector = np.full(shape=target_shape, fill_value=options.coverage)
401+
target_coverage_vector = np.full(shape=len(reference), fill_value=options.coverage)
396402
if not options.no_coverage_bias:
397403
pass
398404
# I'm trying to move this section into cover_dataset.
399405
# target_coverage_vector = gc_bias.create_coverage_bias_vector(target_coverage_vector, reference.seq.upper())
400406

401407
# Apply the targeting/discarded rates.
402-
target_coverage_vector = modify_target_coverage(targeted_regions, discarded_regions, target_coverage_vector)
408+
target_coverage_vector = modify_target_coverage(
409+
targeted_regions, discarded_regions, target_coverage_vector, options.coverage
410+
)
403411

404412
_LOG.debug("Covering dataset.")
405413
t = time.process_time()

0 commit comments

Comments
 (0)