From 23bfc2536200a9151da7e603f63b487269366d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 10:00:50 +0100 Subject: [PATCH 1/7] catch temp file not found --- iss/generator.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/iss/generator.py b/iss/generator.py index d6e98b6..8096f33 100644 --- a/iss/generator.py +++ b/iss/generator.py @@ -194,5 +194,10 @@ def cleanup(file_list): logger.info('Cleaning up') for temp_file in file_list: if temp_file is not None: - os.remove(temp_file + '_R1.fastq') - os.remove(temp_file + '_R2.fastq') + try: + os.remove(temp_file + '_R1.fastq') + os.remove(temp_file + '_R2.fastq') + except FileNotFoundError as e: + logger.error('Temporary file not found: %s' % temp_file) + logger.error('You may have to remove temporary files manually') + sys.exit(1) From 7c26ecdb6569c01455f3d5df68e7aa961770e6c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 10:01:53 +0100 Subject: [PATCH 2/7] doesnt overwrite output fasta file with ncbi option --- iss/app.py | 1 + iss/util.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/iss/app.py b/iss/app.py index 07ac769..1f6cbca 100644 --- a/iss/app.py +++ b/iss/app.py @@ -62,6 +62,7 @@ def generate_reads(args): if args.genomes: genome_file = args.genomes elif args.ncbi and args.n_genomes: + util.genome_file_exists(args.output + '_genomes.fasta') genomes = download.ncbi(args.ncbi, args.n_genomes) genome_file = download.to_fasta(genomes, args.output) else: diff --git a/iss/util.py b/iss/util.py index 648e62f..a135552 100644 --- a/iss/util.py +++ b/iss/util.py @@ -6,6 +6,7 @@ from Bio import SeqIO +import os import sys import logging import numpy as np @@ -132,3 +133,14 @@ def convert_n_reads(unit): logger.error('%s is not a valid number of reads' % unit) sys.exit(1) return unit_int + + +def genome_file_exists(filename): + logger = logging.getLogger(__name__) + try: + assert os.path.exists(filename) == False + except AssertionError as e: + logger.error('%s already exists. Aborting.' % filename) + logger.error('Maybe use --genomes %s' % filename) + logger.error('or use --ncbi with another output prefix') + sys.exit(1) From 0a2e4204e015159ee6a986a4c0d289fe99f32c00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 10:04:53 +0100 Subject: [PATCH 3/7] remove duplicates in temp list --- iss/app.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/iss/app.py b/iss/app.py index 1f6cbca..d0ffd80 100644 --- a/iss/app.py +++ b/iss/app.py @@ -149,8 +149,12 @@ def generate_reads(args): logger.error('iss generate interrupted: %s' % e) generator.cleanup(temp_file_list) else: - generator.concatenate(temp_file_list, args.output) - generator.cleanup(temp_file_list) + # remove the duplicates in file list and cleanup + # we remove the duplicates in case two records had the same header + # and reads were appended to the same temp file. + temp_file_unique = list(set(temp_file_list)) + generator.concatenate(temp_file_unique, args.output) + generator.cleanup(temp_file_unique) logger.info('Read generation complete') From 7ea66e4e44dfe2ed30af39635e9b8a32ff7fbdcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 10:46:03 +0100 Subject: [PATCH 4/7] the efault n_reads is now a string (fixes #40) --- iss/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iss/app.py b/iss/app.py index d0ffd80..bd141e5 100644 --- a/iss/app.py +++ b/iss/app.py @@ -281,7 +281,7 @@ def main(): '--n_reads', '-n', metavar='', - default=1000000, + default='1000000', help='Number of reads to generate (default: %(default)s). Allows \ suffixes k, K, m, M, g and G (ex 0.5M for 500000).' ) From b8dec75917590b45cf428ca2a0b457c9b325c5c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 10:51:19 +0100 Subject: [PATCH 5/7] add warn that model is ignored is mode basic --- iss/app.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/iss/app.py b/iss/app.py index bd141e5..5506fdf 100644 --- a/iss/app.py +++ b/iss/app.py @@ -52,6 +52,11 @@ def generate_reads(args): npz = args.model err_mod = kde.KDErrorModel(npz) elif args.mode == 'basic': + if args.model is not None: + logger.warning( + '--model %s will be ignored in --mode %s' % + (args.model, args.mode) + ) from iss.error_models import basic err_mod = basic.BasicErrorModel() except ImportError as e: From 2a776dc948b25daab038d4af87083079a44379c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 13:21:12 +0100 Subject: [PATCH 6/7] save distribution of genomes to file --- iss/abundance.py | 20 ++++++++++++++++++++ iss/app.py | 24 ++++++++++-------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/iss/abundance.py b/iss/abundance.py index d5d0ee5..04b3ce0 100644 --- a/iss/abundance.py +++ b/iss/abundance.py @@ -162,3 +162,23 @@ def to_coverage(total_n_reads, species_abundance, read_length, genome_size): n_reads = total_n_reads * species_abundance coverage = (n_reads * read_length) / genome_size return coverage + + +def to_file(abundance_dic, output): + """write the abundance dictionary to a file + + Args: + abundance_dic (dict): the abundance dictionary + output (str): the output file name + """ + logger = logging.getLogger(__name__) + output_abundance = output + '_abundance.txt' + try: + f = open(output_abundance, 'w') + except PermissionError as e: + logger.error('Failed to open output file: %s' % e) + sys.exit(1) + else: + with f: + for record, abundance in abundance_dic.items(): + f.write('%s\t%s\n' % (record, abundance)) diff --git a/iss/app.py b/iss/app.py index 5506fdf..8461e0a 100644 --- a/iss/app.py +++ b/iss/app.py @@ -85,25 +85,21 @@ def generate_reads(args): logger.error('Genome(s) file seems empty: %s' % genome_file) sys.exit(1) else: + abundance_dispatch = { + 'uniform': abundance.uniform, + 'halfnormal': abundance.halfnormal, + 'exponential': abundance.exponential, + 'lognormal': abundance.lognormal, + 'zero_inflated_lognormal': abundance.zero_inflated_lognormal + } # read the abundance file if args.abundance_file: logger.info('Using abundance file:%s' % args.abundance_file) abundance_dic = abundance.parse_abundance_file(args.abundance_file) - elif args.abundance == 'uniform': + elif args.abundance in abundance_dispatch: logger.info('Using %s abundance distribution' % args.abundance) - abundance_dic = abundance.uniform(record_list) - elif args.abundance == 'halfnormal': - logger.info('Using %s abundance distribution' % args.abundance) - abundance_dic = abundance.halfnormal(record_list) - elif args.abundance == 'exponential': - logger.info('Using %s abundance distribution' % args.abundance) - abundance_dic = abundance.exponential(record_list) - elif args.abundance == 'lognormal': - logger.info('Using %s abundance distribution' % args.abundance) - abundance_dic = abundance.lognormal(record_list) - elif args.abundance == 'zero_inflated_lognormal': - logger.info('Using %s abundance distribution' % args.abundance) - abundance_dic = abundance.zero_inflated_lognormal(record_list) + abundance_dic = abundance_dispatch[args.abundance](record_list) + abundance.to_file(abundance_dic, args.output) else: logger.error('Could not get abundance') sys.exit(1) From 0bbfc03df74ebd42fbd4ac39dfbb460c42ac946f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= Date: Mon, 13 Nov 2017 14:55:32 +0100 Subject: [PATCH 7/7] version bump (0.8.1) --- README.md | 2 +- doc/conf.py | 2 +- doc/iss/install.rst | 2 +- iss/version.py | 2 +- setup.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9578eab..91e7e91 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ To install InSilicoSeq, type the following in your terminal: Alternatively, with docker: ```shell -docker pull hadrieng/insilicoseq:0.8.0 +docker pull hadrieng/insilicoseq:0.8.1 ``` ## Usage diff --git a/doc/conf.py b/doc/conf.py index f02ce10..9173566 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -60,7 +60,7 @@ # The short X.Y version. version = '0.8' # The full version, including alpha/beta/rc tags. -release = '0.8.0' +release = '0.8.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/iss/install.rst b/doc/iss/install.rst index 0309b9b..4eb74e7 100644 --- a/doc/iss/install.rst +++ b/doc/iss/install.rst @@ -51,7 +51,7 @@ If you wish to use InSilicoSeq using docker .. code-block:: bash - docker pull hadrieng/insilicoseq:0.8.0 + docker pull hadrieng/insilicoseq:0.8.1 To use InSilicoSeq with docker, you need to provide a `volume` to the ``docker run`` command. Given with the ``-v`` option, the volume is your way diff --git a/iss/version.py b/iss/version.py index 32a90a3..ef72cc0 100644 --- a/iss/version.py +++ b/iss/version.py @@ -1 +1 @@ -__version__ = '0.8.0' +__version__ = '0.8.1' diff --git a/setup.py b/setup.py index 300b9b9..31e25ab 100644 --- a/setup.py +++ b/setup.py @@ -5,12 +5,12 @@ setup( name='InSilicoSeq', - version='0.8.0', + version='0.8.1', description='a sequencing simulator', url='https://github.com/HadrienG/InSilicoSeq', - download_url='https://github.com/HadrienG/InSilicoSeq/tarball/0.8.0', + download_url='https://github.com/HadrienG/InSilicoSeq/tarball/0.8.1', author='Hadrien Gourlé', author_email='hadrien.gourle@slu.se',