Skip to content

Commit

Permalink
Merge pull request #310 from nextstrain/read-trees-safely
Browse files Browse the repository at this point in the history
Refactor logic to read trees from multiple formats into a function
  • Loading branch information
rneher authored Jul 10, 2019
2 parents 9c4ed81 + ff90853 commit 2bda188
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 23 deletions.
16 changes: 6 additions & 10 deletions augur/ancestral.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from Bio import Phylo, SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from .utils import write_json
from .utils import read_tree, InvalidTreeError, write_json
from treetime.vcf_utils import read_vcf, write_vcf
from collections import defaultdict

Expand Down Expand Up @@ -106,15 +106,11 @@ def run(args):
is_vcf = False
ref = None
anc_seqs = {}
# check if tree is provided and can be read
for fmt in ["newick", "nexus"]:
try:
T = Phylo.read(args.tree, fmt)
break
except:
pass
if T is None:
print("ERROR: reading tree from %s failed."%args.tree)

try:
T = read_tree(args.tree)
except (FileNotFoundError, InvalidTreeError) as error:
print("ERROR: %s" % error, file=sys.stderr)
return 1

import numpy as np
Expand Down
20 changes: 7 additions & 13 deletions augur/refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import os, shutil, time, sys
from Bio import Phylo
from .utils import read_metadata, get_numerical_dates, write_json
from .utils import read_metadata, read_tree, get_numerical_dates, write_json, InvalidTreeError
from treetime.vcf_utils import read_vcf, write_vcf


def refine(tree=None, aln=None, ref=None, dates=None, branch_length_inference='auto',
confidence=False, resolve_polytomies=True, max_iter=2,
infer_gtr=True, Tc=0.01, reroot=None, use_marginal=False, fixed_pi=None,
Expand Down Expand Up @@ -127,18 +128,11 @@ def run(args):
# list of node attributes that are to be exported, will grow
attributes = ['branch_length']

# check if tree is provided an can be read
T = None #otherwise get 'referenced before assignment' error if reading fails
for fmt in ["newick", "nexus"]:
try:
T = Phylo.read(args.tree, fmt)
node_data['input_tree'] = args.tree
break
except Exception as error:
print("\n\nERROR: reading tree from %s failed: %s" % (args.tree, error))
return 1
if T is None:
print("\n\nERROR: reading tree from %s failed."%args.tree)
try:
T = read_tree(args.tree)
node_data['input_tree'] = args.tree
except (FileNotFoundError, InvalidTreeError) as error:
print("ERROR: %s" % error, file=sys.stderr)
return 1

if not args.alignment:
Expand Down
58 changes: 58 additions & 0 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,64 @@ def get_numerical_dates(meta_dict, name_col = None, date_col='date', fmt=None, m

return numerical_dates


class InvalidTreeError(Exception):
"""Represents an error loading a phylogenetic tree from a filename.
"""
pass


def read_tree(fname, min_terminals=3):
"""Safely load a tree from a given filename or raise an error if the file does
not contain a valid tree.
Parameters
----------
fname : str
name of a file containing a phylogenetic tree
min_terminals : int
minimum number of terminals required for the parsed tree as a sanity
check on the tree
Raises
------
InvalidTreeError
If the given file exists but does not seem to contain a valid tree format.
Returns
-------
Bio.Phylo :
BioPython tree instance
"""
T = None
supported_tree_formats = ["newick", "nexus"]
for fmt in supported_tree_formats:
try:
T = Bio.Phylo.read(fname, fmt)

# Check the sanity of the parsed tree to handle cases when non-tree
# data are still successfully parsed by BioPython. Too few terminals
# in a tree indicates that the input is not valid.
if T.count_terminals() < min_terminals:
T = None
else:
break
except ValueError:
# We cannot open the tree in the current format, so we will try
# another.
pass

# If the tree cannot be loaded, raise an error to that effect.
if T is None:
raise InvalidTreeError(
"Could not read the given tree %s using the following supported formats: %s" % (fname, ", ".join(supported_tree_formats))
)

return T


def read_node_data(fnames, tree=None):
"""parse the "nodes" field of the given JSONs and join the data together"""
if type(fnames) is str:
Expand Down

0 comments on commit 2bda188

Please sign in to comment.