Updated docstrings.

B-UMMI · Apr 12, 2024 · 25b1a08 · 25b1a08
1 parent 6fdcee5
commit 25b1a08
Show file tree

Hide file tree

Showing 30 changed files with 395 additions and 575 deletions.
diff --git a/CHEWBBACA/AlleleCall/allele_call.py b/CHEWBBACA/AlleleCall/allele_call.py
@@ -830,7 +830,7 @@ def write_results_alleles(classification_files, input_identifiers,
 
 def write_results_statistics(classification_files, input_identifiers,
                              cds_counts, output_directory, classification_labels,
-                             repeated_counts, invalid_data, loci_finder):
+                             repeated_counts, invalid_data):
     """Write a TSV file with classification counts per input.
 
     Parameters
@@ -856,9 +856,6 @@ def write_results_statistics(classification_files, input_identifiers,
     invalid_data : dict
         Dictionary with input identifiers as keys and the total
         number of invalid CDSs as values.
-	loci_finder : re.Pattern
-		Regular expression object to search for loci identifiers
-		in paths and filenames.
 
     Returns
     -------
@@ -870,7 +867,6 @@ def write_results_statistics(classification_files, input_identifiers,
     class_counts = {i: {c: 0 for c in classification_labels}
                     for i in input_identifiers}
     for file in classification_files.values():
-        locus_id = loci_finder.search(file).group()
         locus_results = fo.pickle_loader(file)
 
         for i in class_counts:
@@ -1361,7 +1357,7 @@ def process_blast_results(blast_results, bsr_threshold, query_scores):
             bsr = cf.compute_bsr(raw_score, query_scores[query_id][1])
         except Exception as e:
             print('Could not get the self-score for the representative '
-                  f'allele {query_id}')
+                  f'allele {query_id}', e)
             continue
         # Only keep matches above BSR threshold
         if bsr >= bsr_threshold:
@@ -1563,7 +1559,7 @@ def classify_inexact_matches(locus, genomes_matches, inv_map,
                 int(rep_alleleid.replace('*', '').split('_')[-1])
                 rep_alleleid = rep_alleleid.split('_')[-1]
             except Exception as e:
-                pass
+                print(e)
 
             # Get hash of the CDS DNA sequence
             target_dna_hash = match[2]
@@ -2952,8 +2948,7 @@ def main(input_file, loci_list, schema_directory, output_directory,
                                                 output_directory,
                                                 classification_labels,
                                                 repeated_counts,
-                                                results['invalid_alleles'],
-												loci_finder)
+                                                results['invalid_alleles'])
 
     # Create file with class counts per locus called
     print(f'Creating file with class counts per locus ({ct.LOCI_STATS_BASENAME})...')

diff --git a/CHEWBBACA/AlleleCallEvaluator/evaluate_calls.py b/CHEWBBACA/AlleleCallEvaluator/evaluate_calls.py
@@ -214,7 +214,7 @@ def concatenate_loci_alignments(sample, loci, fasta_index, output_directory):
         try:
             alignment += str(fasta_index[seqid].seq)
         except Exception as e:
-            print(f'Could not get {sample} allele for locus {locus}.')
+            print(f'Could not get {sample} allele for locus {locus}.', e)
     # Save alignment for sample
     alignment_outfile = fo.join_paths(output_directory,
                                       [f'{sample}_cgMLST_alignment.fasta'])
@@ -322,8 +322,6 @@ def main(input_files, schema_directory, output_directory, annotations,
     summary_rows = [total_samples, total_loci, total_cds,
                     loci_sums[-1], *loci_sums[:-1]]
 
-    pa_lines = []
-    dm_lines = []
     phylo_data = {"phylo_data": []}
     if light is False:
         if False in [no_pa, no_dm, no_tree] or cg_alignment is True:

diff --git a/CHEWBBACA/CHEWBBACA_NS/README.md b/CHEWBBACA/CHEWBBACA_NS/README.md
diff --git a/CHEWBBACA/CHEWBBACA_NS/download_schema.py b/CHEWBBACA/CHEWBBACA_NS/download_schema.py
@@ -4,11 +4,9 @@
 Purpose
 -------
 
-This module enables the download of chewBBACA's schemas from the
-Chewie-NS.
-
+This module enables the download of schemas from a Chewie-NS instance.
 The process enables the download of ZIP archives that contain ready-to-use
-versions of any schema in the Chewie-NS. It also allows users to download
+versions of any schema in Chewie-NS. It also allows users to download
 any schema with the structure it had at a specific time point. It is also
 possible to download the latest version of the schema through requests to
 the Chewie-NS API, if the compressed version that is available does not

diff --git a/CHEWBBACA/CHEWBBACA_NS/stats_requests.py b/CHEWBBACA/CHEWBBACA_NS/stats_requests.py
@@ -4,64 +4,28 @@
 Purpose
 -------
 
-This module enables the retrieval of information/stats from the
-Chewie-NS. Its main objective is to provide information about
-the list of species and schemas in the Chewie-NS, so that users
+This module enables the retrieval of information/stats from a
+Chewie-NS instance. Its main objective is to provide information about
+the list of species and schemas in Chewie-NS, so that users
 can quickly identify a schema of interest and download it (this
 process generates tables with species and schemas identifiers that
 can be passed to the `-sc` and `-sp` arguments of DownloadSchema).
 
-Expected input
---------------
-
-The process expects the following variables whether through command line
-execution or invocation of the :py:func:`main` function:
-
-- ``-m``, ``stats_mode`` : The process can retrieve the list of species
-  ("species" option) in the Chewie-NS, the list of schemas for a species
-  ("schemas" option and valid value for `--sp`) or information about a
-  single schema ("schemas" option and valid values for `--sp` and `--sc`).
-
-    - e.g.: ``species`` or ``schemas``
-
-- ``--ns_url``, ``nomenclature_server_url`` : The base URL for the Nomenclature Server.
-  The default value, "main", will establish a connection to "https://chewbbaca.online/",
-  "tutorial" to "https://tutorial.chewbbaca.online/"" and "local" to
-  "http://127.0.0.1:5000/NS/api/" (localhost). Users may also provide the IP address to
-  other Chewie-NS instances.
-
-    - e.g.: ``http://127.0.0.1:5000/NS/api/`` (localhost)
-
-- ``--sp``, ``species_id`` : The integer identifier of a species
-  in the Chewie-NS. The process will retrieve the list of schemas
-  for the species with specified identifier.
-
-    - e.g.: ``2``
-
-- ``--sc``, ``schema_id`` : The integer identifier of a schema in
-  the Chewie-NS. The process will retrieve information about the
-  schema with specified identifier.
-
-    - e.g.: ``4``
-
 Code documentation
 ------------------
 """
 
 
 import sys
 import requests
-import argparse
 from urllib3.exceptions import InsecureRequestWarning
 
 try:
     from utils import (constants as ct,
-                       chewiens_requests as cr,
-                       parameters_validation as pv)
+                       chewiens_requests as cr)
 except ModuleNotFoundError:
     from CHEWBBACA.utils import (constants as ct,
-                                 chewiens_requests as cr,
-                                 parameters_validation as pv)
+                                 chewiens_requests as cr)
 
 
 # Suppress only the single warning from urllib3 needed.
@@ -384,7 +348,23 @@ def single_schema(species_id, schema_id, base_url, headers_get):
 
 
 def main(mode, nomenclature_server, species_id, schema_id):
+    """Get species and sschema statistics from a Chewie-NS instance.
 
+    Parameters
+    ----------
+    mode : str
+        The process can retrieve the list of species ("species" option)
+        from Chewie-NS, the list of schemas for a species ("schemas"
+        option and valid value for `species_id`) or information about a
+        single schema ("schemas" option and valid values for `species_id`
+        and `schema_id`).
+    nomenclature_server : str
+        The base URL for the Chewie-NS instance.
+    species_id : int
+        The integer identifier of a species in Chewie-NS.
+    schema_id : int
+        The integer identifier of a schema in Chewie-NS.
+    """
     headers_get = ct.HEADERS_GET_JSON
 
     print('\nRetrieving data...')
@@ -402,50 +382,6 @@ def main(mode, nomenclature_server, species_id, schema_id):
             sys.exit('\nPlease provide a valid species identifier '
                      'to get the list of available schemas.\n')
 
-    # print stats
+    # Print stats
     stats_text = '\n'.join(stats)
     print('\n{0}\n'.format(stats_text))
-
-
-def parse_arguments():
-
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=argparse.RawDescriptionHelpFormatter)
-
-    parser.add_argument('-m', type=str, required=True,
-                        dest='stats_mode', choices=['species', 'schemas'],
-                        help='The process can retrieve the list of species '
-                             '("species" option) in the Chewie-NS or the '
-                             'list of schemas for a species '
-                             '("schemas" option).')
-
-    parser.add_argument('--ns', type=pv.validate_ns_url, required=False,
-                        default='main',
-                        dest='nomenclature_server',
-                        help='The base URL for the Nomenclature Server. '
-                             'The default value, "main", will establish a '
-                             'connection to "https://chewbbaca.online/", '
-                             '"tutorial" to "https://tutorial.chewbbaca.online/" '
-                             'and "local" to "http://127.0.0.1:5000/NS/api/" (localhost). '
-                             'Users may also provide the IP address to other '
-                             'Chewie-NS instances.')
-
-    parser.add_argument('--sp', type=str, required=False,
-                        dest='species_id', default=None,
-                        help='The integer identifier of a '
-                             'species in the Chewie-NS.')
-
-    parser.add_argument('--sc', type=str, required=False,
-                        dest='schema_id', default=None,
-                        help='The integer identifier of a schema in '
-                             'the Chewie-NS.')
-
-    args = parser.parse_args()
-
-    return args
-
-
-if __name__ == '__main__':
-
-    args = parse_arguments()
-    main(**(vars(args)))