@@ -313,21 +313,30 @@ def replace_n(segment: Seq, rng: Generator) -> Seq:
313
313
return Seq (modified_segment )
314
314
315
315
316
- def modify_target_coverage (included_regions : list , excluded_regions : list , coverage_vector : np .ndarray ):
316
+ def modify_target_coverage (target_regions : list , discard_regions : list , coverage_vector : np .ndarray , coverage : int ):
317
317
"""
318
318
Modifies the coverage vector by applying the list of regions. For this version, areas
319
319
outside the regions have coverage adjusted by the off_target_percent
320
320
321
- :param included_regions : A list of intervals to target, extracted from a bed file
322
- :param excluded_regions : A list of regions to throw out, extracted from a bed file
321
+ :param target_regions : A list of intervals to target, extracted from a bed file
322
+ :param discard_regions : A list of regions to throw out, extracted from a bed file
323
323
:param coverage_vector: The target coverage vector, which will be modified
324
324
:return: The updated target coverage vector.
325
325
"""
326
326
327
327
# this will tabulate values for included regions first, then for excluded regions. Hopefully both are not present.
328
328
# If only one is present, the other should not change it.
329
- for region in included_regions + excluded_regions :
330
- coverage_vector [region [0 ]: region [1 ]] = coverage_vector [region [0 ]: region [1 ]] * region [2 ]
329
+
330
+ for region in target_regions :
331
+ # in the included regions, any are marked false is to be excluded. Everything else remains untouched
332
+ if not region [2 ]:
333
+ coverage_vector [region [0 ]: region [1 ]] = [0 ] * (region [1 ] - region [0 ])
334
+
335
+ for region in discard_regions :
336
+ # in the discard regions section, a True indicates this region falls
337
+ # within a discard bed and should be discarded
338
+ if region [2 ]:
339
+ coverage_vector [region [0 ]: region [1 ]] = [0 ] * (region [1 ] - region [0 ])
331
340
332
341
return coverage_vector
333
342
@@ -388,18 +397,17 @@ def generate_reads(reference: SeqRecord,
388
397
start = time .time ()
389
398
390
399
base_name = f'{ Path (options .output ).name } -{ chrom } '
391
- # We want to bin the coverage into window-sized segments to speed up calculations.
392
- # This divides the segment into len(reference) // window_size rounded up to the nearest int.
393
- target_shape = ceil (len (reference ) // gc_bias .window_size )
394
400
# Assume uniform coverage
395
- target_coverage_vector = np .full (shape = target_shape , fill_value = options .coverage )
401
+ target_coverage_vector = np .full (shape = len ( reference ) , fill_value = options .coverage )
396
402
if not options .no_coverage_bias :
397
403
pass
398
404
# I'm trying to move this section into cover_dataset.
399
405
# target_coverage_vector = gc_bias.create_coverage_bias_vector(target_coverage_vector, reference.seq.upper())
400
406
401
407
# Apply the targeting/discarded rates.
402
- target_coverage_vector = modify_target_coverage (targeted_regions , discarded_regions , target_coverage_vector )
408
+ target_coverage_vector = modify_target_coverage (
409
+ targeted_regions , discarded_regions , target_coverage_vector , options .coverage
410
+ )
403
411
404
412
_LOG .debug ("Covering dataset." )
405
413
t = time .process_time ()
0 commit comments