Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor logic to read trees from multiple formats into a function #310

Merged
merged 1 commit into from
Jul 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions augur/ancestral.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from Bio import Phylo, SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from .utils import write_json
from .utils import read_tree, InvalidTreeError, write_json
from treetime.vcf_utils import read_vcf, write_vcf
from collections import defaultdict

Expand Down Expand Up @@ -106,15 +106,11 @@ def run(args):
is_vcf = False
ref = None
anc_seqs = {}
# check if tree is provided and can be read
for fmt in ["newick", "nexus"]:
try:
T = Phylo.read(args.tree, fmt)
break
except:
pass
if T is None:
print("ERROR: reading tree from %s failed."%args.tree)

try:
T = read_tree(args.tree)
except (FileNotFoundError, InvalidTreeError) as error:
print("ERROR: %s" % error, file=sys.stderr)
return 1

import numpy as np
Expand Down
20 changes: 7 additions & 13 deletions augur/refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import os, shutil, time, sys
from Bio import Phylo
from .utils import read_metadata, get_numerical_dates, write_json
from .utils import read_metadata, read_tree, get_numerical_dates, write_json, InvalidTreeError
from treetime.vcf_utils import read_vcf, write_vcf


def refine(tree=None, aln=None, ref=None, dates=None, branch_length_inference='auto',
confidence=False, resolve_polytomies=True, max_iter=2,
infer_gtr=True, Tc=0.01, reroot=None, use_marginal=False, fixed_pi=None,
Expand Down Expand Up @@ -127,18 +128,11 @@ def run(args):
# list of node attributes that are to be exported, will grow
attributes = ['branch_length']

# check if tree is provided an can be read
T = None #otherwise get 'referenced before assignment' error if reading fails
for fmt in ["newick", "nexus"]:
try:
T = Phylo.read(args.tree, fmt)
node_data['input_tree'] = args.tree
break
except Exception as error:
print("\n\nERROR: reading tree from %s failed: %s" % (args.tree, error))
return 1
if T is None:
print("\n\nERROR: reading tree from %s failed."%args.tree)
try:
T = read_tree(args.tree)
node_data['input_tree'] = args.tree
except (FileNotFoundError, InvalidTreeError) as error:
print("ERROR: %s" % error, file=sys.stderr)
return 1

if not args.alignment:
Expand Down
58 changes: 58 additions & 0 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,64 @@ def get_numerical_dates(meta_dict, name_col = None, date_col='date', fmt=None, m

return numerical_dates


class InvalidTreeError(Exception):
"""Represents an error loading a phylogenetic tree from a filename.
"""
pass


def read_tree(fname, min_terminals=3):
"""Safely load a tree from a given filename or raise an error if the file does
not contain a valid tree.
Parameters
----------
fname : str
name of a file containing a phylogenetic tree
min_terminals : int
minimum number of terminals required for the parsed tree as a sanity
check on the tree
Raises
------
InvalidTreeError
If the given file exists but does not seem to contain a valid tree format.
Returns
-------
Bio.Phylo :
BioPython tree instance
"""
T = None
supported_tree_formats = ["newick", "nexus"]
for fmt in supported_tree_formats:
try:
T = Bio.Phylo.read(fname, fmt)

# Check the sanity of the parsed tree to handle cases when non-tree
# data are still successfully parsed by BioPython. Too few terminals
# in a tree indicates that the input is not valid.
if T.count_terminals() < min_terminals:
T = None
else:
break
except ValueError:
# We cannot open the tree in the current format, so we will try
# another.
pass

# If the tree cannot be loaded, raise an error to that effect.
if T is None:
raise InvalidTreeError(
"Could not read the given tree %s using the following supported formats: %s" % (fname, ", ".join(supported_tree_formats))
)

return T


def read_node_data(fnames, tree=None):
"""parse the "nodes" field of the given JSONs and join the data together"""
if type(fnames) is str:
Expand Down