From 0192c79a2fd51f87ed5a84d6fc1ffe672aceb704 Mon Sep 17 00:00:00 2001 From: rfm-targa Date: Mon, 15 Jul 2024 20:28:49 +0100 Subject: [PATCH] Fixed issue related to CDS counting when it is not possible to predict CDSs for one or more inputs. --- CHEWBBACA/AlleleCall/allele_call.py | 6 +++--- CHEWBBACA/utils/constants.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHEWBBACA/AlleleCall/allele_call.py b/CHEWBBACA/AlleleCall/allele_call.py index 188d3fb3..5c1d95e5 100644 --- a/CHEWBBACA/AlleleCall/allele_call.py +++ b/CHEWBBACA/AlleleCall/allele_call.py @@ -1990,9 +1990,9 @@ def allele_calling(fasta_files, schema_directory, temp_directory, print('Make sure that Pyrodigal runs in meta mode (--pm meta) ' 'if any input file has less than 100kbp.') if len(cds_fastas) == 0: - sys.exit(f'\n{ct.CANNOT_PREDICT}') + sys.exit(f'{ct.CANNOT_PREDICT}') - print(f'\nExtracted a total of {total_extracted} CDSs from {len(fasta_files)} inputs.') + print(f'\nExtracted a total of {total_extracted} CDSs from {len(fasta_files)-len(failed)} inputs.') # Inputs are Fasta files with the predicted CDSs else: # Rename the CDSs in each file based on the input unique identifiers @@ -2042,7 +2042,7 @@ def allele_calling(fasta_files, schema_directory, temp_directory, template_dict['int_to_unique'] = int_to_unique # Change to unique integer identifiers - cds_counts = {unique_to_int[k]: v for k, v in cds_counts.items()} + cds_counts = {unique_to_int[k]: v for k, v in cds_counts.items() if k in unique_to_int} template_dict['cds_counts'] = cds_counts # Concatenate subgroups of FASTA files before deduplication diff --git a/CHEWBBACA/utils/constants.py b/CHEWBBACA/utils/constants.py index e6d31a4e..61c79950 100755 --- a/CHEWBBACA/utils/constants.py +++ b/CHEWBBACA/utils/constants.py @@ -528,7 +528,7 @@ # e.g. files only contain sequence headers, contain invalid # sequences/chars or pyrodigal cannot predict any genes CANNOT_PREDICT = ('Could not predict CDSs from any of the input files.' - '\nPlease provide input files in the accepted FASTA format.') + '\nPlease verify the format of the input files.') INVALID_BSR = ('\nBSR value is not contained in the [0.0, 1.0] interval.') INVALID_BSR_TYPE = ('\nInvalid BSR value of {0}. BSR value must be contained in the [0.0, 1.0] interval.')