diff --git a/augur/ancestral.py b/augur/ancestral.py index 49aeee986..f573e8faf 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -6,7 +6,7 @@ from Bio import Phylo, SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from .utils import write_json +from .utils import read_tree, InvalidTreeError, write_json from treetime.vcf_utils import read_vcf, write_vcf from collections import defaultdict @@ -106,15 +106,11 @@ def run(args): is_vcf = False ref = None anc_seqs = {} - # check if tree is provided and can be read - for fmt in ["newick", "nexus"]: - try: - T = Phylo.read(args.tree, fmt) - break - except: - pass - if T is None: - print("ERROR: reading tree from %s failed."%args.tree) + + try: + T = read_tree(args.tree) + except (FileNotFoundError, InvalidTreeError) as error: + print("ERROR: %s" % error, file=sys.stderr) return 1 import numpy as np diff --git a/augur/refine.py b/augur/refine.py index 8d4135d03..4f6e16b4f 100644 --- a/augur/refine.py +++ b/augur/refine.py @@ -4,9 +4,10 @@ import os, shutil, time, sys from Bio import Phylo -from .utils import read_metadata, get_numerical_dates, write_json +from .utils import read_metadata, read_tree, get_numerical_dates, write_json, InvalidTreeError from treetime.vcf_utils import read_vcf, write_vcf + def refine(tree=None, aln=None, ref=None, dates=None, branch_length_inference='auto', confidence=False, resolve_polytomies=True, max_iter=2, infer_gtr=True, Tc=0.01, reroot=None, use_marginal=False, fixed_pi=None, @@ -127,18 +128,11 @@ def run(args): # list of node attributes that are to be exported, will grow attributes = ['branch_length'] - # check if tree is provided an can be read - T = None #otherwise get 'referenced before assignment' error if reading fails - for fmt in ["newick", "nexus"]: - try: - T = Phylo.read(args.tree, fmt) - node_data['input_tree'] = args.tree - break - except Exception as error: - print("\n\nERROR: reading tree from %s failed: %s" % (args.tree, error)) - return 1 - if T is None: - print("\n\nERROR: reading tree from %s failed."%args.tree) + try: + T = read_tree(args.tree) + node_data['input_tree'] = args.tree + except (FileNotFoundError, InvalidTreeError) as error: + print("ERROR: %s" % error, file=sys.stderr) return 1 if not args.alignment: diff --git a/augur/utils.py b/augur/utils.py index 3a29b11e5..240c332c8 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -93,6 +93,64 @@ def get_numerical_dates(meta_dict, name_col = None, date_col='date', fmt=None, m return numerical_dates + +class InvalidTreeError(Exception): + """Represents an error loading a phylogenetic tree from a filename. + """ + pass + + +def read_tree(fname, min_terminals=3): + """Safely load a tree from a given filename or raise an error if the file does + not contain a valid tree. + + Parameters + ---------- + fname : str + name of a file containing a phylogenetic tree + + min_terminals : int + minimum number of terminals required for the parsed tree as a sanity + check on the tree + + Raises + ------ + InvalidTreeError + If the given file exists but does not seem to contain a valid tree format. + + Returns + ------- + Bio.Phylo : + BioPython tree instance + + """ + T = None + supported_tree_formats = ["newick", "nexus"] + for fmt in supported_tree_formats: + try: + T = Bio.Phylo.read(fname, fmt) + + # Check the sanity of the parsed tree to handle cases when non-tree + # data are still successfully parsed by BioPython. Too few terminals + # in a tree indicates that the input is not valid. + if T.count_terminals() < min_terminals: + T = None + else: + break + except ValueError: + # We cannot open the tree in the current format, so we will try + # another. + pass + + # If the tree cannot be loaded, raise an error to that effect. + if T is None: + raise InvalidTreeError( + "Could not read the given tree %s using the following supported formats: %s" % (fname, ", ".join(supported_tree_formats)) + ) + + return T + + def read_node_data(fnames, tree=None): """parse the "nodes" field of the given JSONs and join the data together""" if type(fnames) is str: