From 226ee394a0629b2e4b0caacca2b6aa3583aa9a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Thu, 28 Jan 2021 10:56:29 +0100 Subject: [PATCH 1/3] clarify coverage_file arg (#191) --- doc/iss/generate.rst | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/iss/generate.rst b/doc/iss/generate.rst index 376c0c6..c6f6c50 100644 --- a/doc/iss/generate.rst +++ b/doc/iss/generate.rst @@ -135,13 +135,28 @@ Coverage distribution In the context of InSilicoSeq, the `abundance` is the proportion of reads in a sample, which since it does not acount for the length of the genome, does not necessarily reflect the number of organisms present in a sample. -The `coverage`and `coverage_file` options allow for simulating reads according to a coverage distribution instead of abundance. +The ``coverage`` and ``coverage_file`` options allow for simulating reads according to a coverage distribution instead of abundance. .. code-block:: bash iss generate --ncbi bacteria -U 50 --coverage lognormal -n 25M \ --model novaseq --output reads +The ``coverage_file`` option works similarly to the ``abundance_file`` option. +For two genomes A and B: + +.. code-block:: bash + + iss generate --genomes genomes.fasta --coverage_file coverage.txt \ + --model novaseq --output reads + +with, for a coverage of 20x for genome_A and 100x for genome_B, the coverage file `coverage.txt` will be: + +.. code-block:: bash + + genome_A 20 + genome_B 100 + GC bias ------- From 3a3ce937e246b49f37ae76c0a382a08767b85eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Fri, 29 Jan 2021 12:29:42 +0100 Subject: [PATCH 2/3] fix rounding error (#195) --- iss/app.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/iss/app.py b/iss/app.py index 0dce452..24b7d8a 100644 --- a/iss/app.py +++ b/iss/app.py @@ -229,7 +229,8 @@ def generate_reads(args): f = open(genome_file, 'r') # re-opens the file with f: fasta_file = SeqIO.parse(f, 'fasta') - + total_reads_generated = 0 + total_reads_generated_unrouned = 0 for record in fasta_file: # generate reads for records try: @@ -252,9 +253,22 @@ def generate_reads(args): err_mod.read_length, genome_size ) - n_pairs = int(round( - (coverage * - len(record.seq)) / err_mod.read_length) / 2) + n_pairs_unrounded = ( + (coverage * len(record.seq)) / + err_mod.read_length) / 2 + n_pairs = round(n_pairs_unrounded) + + # check that the rounding does not cause to drop + # read pairs + total_reads_generated_unrouned += n_pairs_unrounded + total_reads_generated += n_pairs + if round(total_reads_generated_unrouned) > \ + total_reads_generated: + logger.debug( + "Adding a pair to correct rounding error") + n_pairs += 1 + total_reads_generated += 1 + # skip record if n_reads == 0 if n_pairs == 0: continue From 9662b8a17a7174c0583b131be5a3c05f1d28f5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 1 Feb 2021 10:15:51 +0100 Subject: [PATCH 3/3] version bump (1.5.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Hadrien Gourlé --- iss/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iss/version.py b/iss/version.py index 51ed7c4..c3b3841 100644 --- a/iss/version.py +++ b/iss/version.py @@ -1 +1 @@ -__version__ = '1.5.1' +__version__ = '1.5.2'