From 7427c2475b0c0644d02d73c2c03d2520c6e570e0 Mon Sep 17 00:00:00 2001 From: Thijs Date: Fri, 2 Aug 2024 11:47:43 +0200 Subject: [PATCH 1/2] Convert coverage file to read_count dict --- iss/abundance.py | 17 +++++++++++++++++ iss/generator.py | 6 ++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/iss/abundance.py b/iss/abundance.py index 71fc93d..d066727 100644 --- a/iss/abundance.py +++ b/iss/abundance.py @@ -315,3 +315,20 @@ def expand_draft_abundance(abundance_dic, draft, mode="abundance"): elif mode == "coverage": draft_dic[record.id] = abundance return draft_dic + + +def to_readcount(coverage_dic, read_length, genome_file): + logger = logging.getLogger(__name__) + readcount_dic = {} + f = open(genome_file, "r") # re-opens the file + with f: + fasta_file = SeqIO.parse(f, "fasta") + for record in fasta_file: + try: + record_coverage = coverage_dic[record.id] + except KeyError as e: + logger.warning("Fasta record not found in coverage file: %s" % e) + continue + n_reads = int((record_coverage * len(record.seq)) / read_length) + readcount_dic[record.id] = n_reads + return readcount_dic diff --git a/iss/generator.py b/iss/generator.py index 70033f5..29bb70f 100644 --- a/iss/generator.py +++ b/iss/generator.py @@ -555,9 +555,11 @@ def load_readcount_or_abundance( coverage_dic = abundance.parse_abundance_file(coverage_file) complete_genomes_dic = {k: v for k, v in coverage_dic.items() if k not in draft} draft_dic = abundance.expand_draft_abundance(coverage_dic, draft, mode="coverage") - abundance_dic = {**complete_genomes_dic, **draft_dic} + coverage_dic = {**complete_genomes_dic, **draft_dic} else: - abundance_dic = abundance.parse_abundance_file(coverage_file) + coverage_dic = abundance.parse_abundance_file(coverage_file) + # generate new n_reads from coverage + readcount_dic = abundance.to_readcount(coverage_dic, error_model.read_length, genome_file) elif coverage in abundance_dispatch: # todo coverage distribution with --draft logger.info("Using %s coverage distribution" % coverage) From 82303948543b0ea68c2d843ebf532b54ec6ac251 Mon Sep 17 00:00:00 2001 From: Thijs Date: Fri, 2 Aug 2024 11:53:19 +0200 Subject: [PATCH 2/2] comment --- iss/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iss/generator.py b/iss/generator.py index 29bb70f..9af5f14 100644 --- a/iss/generator.py +++ b/iss/generator.py @@ -558,7 +558,7 @@ def load_readcount_or_abundance( coverage_dic = {**complete_genomes_dic, **draft_dic} else: coverage_dic = abundance.parse_abundance_file(coverage_file) - # generate new n_reads from coverage + # generate n_reads for each genome from coverage readcount_dic = abundance.to_readcount(coverage_dic, error_model.read_length, genome_file) elif coverage in abundance_dispatch: # todo coverage distribution with --draft