|
21 | 21 | from .default_mutation_model import *
|
22 | 22 | from .default_sequencing_error_model import *
|
23 | 23 | from .default_gc_bias_model import *
|
24 |
| -from .default_fraglen_model import * |
25 | 24 | from .utils import bin_scores, take_closest
|
26 | 25 |
|
27 | 26 | __all__ = [
|
@@ -600,53 +599,28 @@ class FragmentLengthModel:
|
600 | 599 |
|
601 | 600 | :param fragment_mean: the mean of the collection of fragment lengths derived from data
|
602 | 601 | :param fragment_std: the standard deviation of the collection of fragment lengths derived from data
|
603 |
| - :param fragment_max: the largest fragment observed in the data |
604 |
| - :param fragment_min: the smallest fragment observed in data |
605 | 602 | :param rng: the random number generator for the run
|
606 | 603 | """
|
607 | 604 |
|
608 | 605 | def __init__(self,
|
609 |
| - fragment_mean: float = None, |
610 |
| - fragment_std: float = None, |
611 |
| - fragment_max: int = None, |
612 |
| - fragment_min: int = None, |
| 606 | + fragment_mean: float, |
| 607 | + fragment_std: float, |
613 | 608 | rng: Generator = None):
|
614 |
| - self.fragment_mean = fragment_mean if fragment_mean else default_fragment_mean |
615 |
| - self.fragment_st_dev = fragment_std if fragment_std else default_fragment_std |
616 |
| - self.fragment_max = fragment_max if fragment_max else default_fragment_max |
617 |
| - self.fragment_min = fragment_min if fragment_min else default_fragment_min |
| 609 | + self.fragment_mean = fragment_mean |
| 610 | + self.fragment_st_dev = fragment_std |
618 | 611 | self.rng = rng
|
619 | 612 |
|
620 | 613 | def generate_fragments(self,
|
621 | 614 | total_length: int,
|
622 |
| - read_length: int, |
623 |
| - coverage: int) -> list: |
| 615 | + number_of_fragments: int) -> list: |
624 | 616 | """
|
625 | 617 | Generates a number of fragments based on the total length needed, and the mean and standard deviation of the set
|
626 | 618 |
|
627 | 619 | :param total_length: Length of the reference segment we are covering.
|
628 |
| - :param read_length: average length of the reads |
629 |
| - :param coverage: the target coverage number |
| 620 | + :param number_of_fragments: The number of fragments needed. |
630 | 621 | :return: A list of fragment random fragment lengths sampled from the model.
|
631 | 622 | """
|
632 |
| - # Estimate the number of fragments needed (with a 2x padding) |
633 |
| - number_of_fragments = int(round(total_length / read_length) * (coverage * 2)) |
634 |
| - # Check that we don't have unusable values for fragment mean. Too many fragments under the read length means |
635 |
| - # NEAT will either get caught in an infinite cycle of sampling fragments but never finding one that works, or |
636 |
| - # it will only find a few and will run very slowly. |
637 |
| - if self.fragment_mean < read_length: |
638 |
| - # Let's just reset the fragment mean to make up for this. |
639 |
| - self.fragment_mean = read_length |
640 | 623 | # generates a distribution, assuming normality, then rounds the result and converts to ints
|
641 | 624 | dist = np.round(self.rng.normal(self.fragment_mean, self.fragment_st_dev, size=number_of_fragments)).astype(int)
|
642 |
| - # filter the list to throw out outliers and to set anything under the read length to the read length. |
643 |
| - dist = [max(x, read_length) for x in dist if x <= self.fragment_max] |
644 |
| - # Just a sanity check to make sure our data isn't too thin: |
645 |
| - while number_of_fragments - len(dist) > 0: |
646 |
| - additional_fragment = self.rng.normal(loc=self.fragment_mean, scale=self.fragment_st_dev) |
647 |
| - if additional_fragment < read_length: |
648 |
| - continue |
649 |
| - dist.append(round(additional_fragment)) |
650 |
| - |
651 |
| - # Now set a minimum on the dataset. Any fragment smaller than read_length gets turned into read_length |
652 |
| - return dist |
| 625 | + |
| 626 | + return list(dist) |
0 commit comments