nextstrain · rneher · Jul 10, 2019 · May 9, 2019 · May 13, 2019 · jameshadfield
diff --git a/augur/ancestral.py b/augur/ancestral.py
@@ -70,6 +70,16 @@ def run(args):
         print("ERROR: reading tree from %s failed."%args.tree)
         return 1
 
+    import numpy as np
+    missing_internal_node_names = [n.name is None for n in T.get_nonterminals()]
+    if np.all(missing_internal_node_names):
+        print("\n*** WARNING: Tree has no internal node names!")
+        print("*** Without internal node names, ancestral sequences can't be linked up to the correct node later.")
+        print("*** If you want to use 'augur export' or `augur translate` later, re-run this command with the output of 'augur refine'.")
+        print("*** If you haven't run 'augur refine', you can add node names to your tree by running:")
+        print("*** augur refine --tree %s --output-tree <filename>.nwk"%(args.tree) )
+        print("*** And use <filename>.nwk as the tree when running 'ancestral', 'translate', and 'traits'")
+
     if any([args.alignment.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]):
         if not args.vcf_reference:
             print("ERROR: a reference Fasta is required with VCF-format alignments")

diff --git a/augur/export.py b/augur/export.py
@@ -53,6 +53,8 @@ def convert_tree_to_json_structure(node, metadata, div=0, nextflu_schema=False,
                 cdiv = div + metadata[child.name]['mutation_length']
             elif 'branch_length' in metadata[child.name]:
                 cdiv = div + metadata[child.name]['branch_length']
+            else:
+                print("ERROR: Cannot find branch length information for %s"%(child.name))
             node_struct["children"].append(convert_tree_to_json_structure(child, metadata, div=cdiv, nextflu_schema=nextflu_schema, strains=strains)[0])
 
     return (node_struct, strains)
@@ -77,7 +79,7 @@ def recursively_decorate_tree_json_nextflu_schema(node, node_metadata, decoratio
         metadata = node_metadata[node["strain"]]
         metadata["strain"] = node["strain"]
     except KeyError:
-        raise Exception("ERROR: node %s is not found in the node metadata."%n.name)
+        raise Exception("ERROR: node %s is not found in the node metadata."%node.name)
 
     for data in decorations:
         val = None

diff --git a/augur/refine.py b/augur/refine.py
@@ -128,6 +128,7 @@ def run(args):
     attributes = ['branch_length']
 
     # check if tree is provided an can be read
+    T = None #otherwise get 'referenced before assignment' error if reading fails
     for fmt in ["newick", "nexus"]:
         try:
             T = Phylo.read(args.tree, fmt)
@@ -165,8 +166,10 @@ def run(args):
     # if not specified, construct default output file name with suffix _tt.nwk
     if args.output_tree:
         tree_fname = args.output_tree
-    else:
+    elif args.alignment:
         tree_fname = '.'.join(args.alignment.split('.')[:-1]) + '_tt.nwk'
+    else:
+        tree_fname = '.'.join(args.tree.split('.')[:-1]) + '_tt.nwk'
 
     if args.timetree:
         # load meta data and covert dates to numeric
@@ -215,10 +218,13 @@ def run(args):
     import json
     tree_success = Phylo.write(T, tree_fname, 'newick', format_branch_length='%1.8f')
     print("updated tree written to",tree_fname, file=sys.stdout)
+
     if args.output_node_data:
         node_data_fname = args.output_node_data
-    else:
+    elif args.alignment:
         node_data_fname = '.'.join(args.alignment.split('.')[:-1]) + '.node_data.json'
+    else:
+        node_data_fname = '.'.join(args.tree.split('.')[:-1]) + '.node_data.json'
 
     write_json(node_data, node_data_fname)
     print("node attributes written to",node_data_fname, file=sys.stdout)

diff --git a/augur/traits.py b/augur/traits.py
@@ -165,6 +165,17 @@ def run(args):
     tree_fname = args.tree
     traits, columns = read_metadata(args.metadata)
 
+    from Bio import Phylo
+    T = Phylo.read(tree_fname, 'newick')
+    missing_internal_node_names = [n.name is None for n in T.get_nonterminals()]
+    if np.all(missing_internal_node_names):
+        print("\n*** WARNING: Tree has no internal node names!")
+        print("*** Without internal node names, ancestral traits can't be linked up to the correct node later.")
+        print("*** If you want to use 'augur export' later, re-run this command with the output of 'augur refine'.")
+        print("*** If you haven't run 'augur refine', you can add node names to your tree by running:")
+        print("*** augur refine --tree %s --output-tree <filename>.nwk"%(tree_fname) )
+        print("*** And use <filename>.nwk as the tree when running 'ancestral', 'translate', and 'traits'")
+
     mugration_states = defaultdict(dict)
     for column in args.columns:
         T, gtr, alphabet = mugration_inference(tree=tree_fname, seq_meta=traits,

diff --git a/augur/translate.py b/augur/translate.py
@@ -8,6 +8,12 @@
 from .utils import read_node_data, load_features, write_json, write_VCF_translation
 from treetime.vcf_utils import read_vcf
 
+class MissingNodeError(Exception):
+    pass
+
+class MismatchNodeError(Exception):
+    pass
+
 def safe_translate(sequence, report_exceptions=False):
     """Returns an amino acid translation of the given nucleotide sequence accounting
     for gaps in the given sequence.
@@ -181,7 +187,15 @@ def assign_aa_vcf(tree, translations):
     #get mutations on the root
     root = tree.root
     aa_muts[root.name]={"aa_muts":{}}
+    #If has no root node name, exit with error
+    if root.name is None:
+        print("\n*** Can't find node name for the tree root!")
+        raise MissingNodeError()
+
     for fname, prot in translations.items():
+        if root.name not in prot['sequences']:
+            print("\n*** Can't find %s in the alignment provided!"%(root.name))
+            raise MismatchNodeError()
         root_muts = prot['sequences'][root.name]
         tmp = []
         for pos in prot['positions']:
@@ -193,9 +207,15 @@ def assign_aa_vcf(tree, translations):
         for c in n:
             aa_muts[c.name]={"aa_muts":{}}
         for fname, prot in translations.items():
+            if n.name not in prot['sequences']:
+                print("\n*** Can't find %s in the alignment provided!"%(root.name))
+                raise MismatchNodeError()
             n_muts = prot['sequences'][n.name]
             for c in n:
                 tmp = []
+                if c.name is None:
+                    print("\n*** Internal node missing a node name!")
+                    raise MissingNodeError()
                 c_muts = prot['sequences'][c.name]
                 for pos in prot['positions']:
                     #if pos in both, check if same
@@ -211,6 +231,39 @@ def assign_aa_vcf(tree, translations):
 
     return aa_muts
 
+def assign_aa_fasta(tree, translations):
+    aa_muts = {}
+
+    #fasta input shouldn't have mutations on root, so give empty entry
+    root = tree.get_nonterminals()[0]
+    aa_muts[root.name]={"aa_muts":{}}
+
+    for n in tree.get_nonterminals():
+        if n.name is None:
+            print("\n*** Tree is missing node names!")
+            raise MissingNodeError()
+        for c in n:
+            aa_muts[c.name]={"aa_muts":{}}
+        for fname, aln in translations.items():
+            for c in n:
+                if c.name in aln and n.name in aln:
+                    tmp = [construct_mut(a, int(pos+1), d) for pos, (a,d) in
+                            enumerate(zip(aln[n.name], aln[c.name])) if a!=d]
+                    aa_muts[c.name]["aa_muts"][fname] = tmp
+                elif c.name not in aln and n.name not in aln:
+                    print("\n*** Can't find %s OR %s in the alignment provided!"%(c.name, n.name))
+                    raise MismatchNodeError()
+                else:
+                    print("no sequence pair for nodes %s-%s"%(c.name, n.name))
+
+        if n==tree.root:
+            aa_muts[n.name]={"aa_muts":{}, "aa_sequences":{}}
+            for fname, aln in translations.items():
+                if n.name in aln:
+                    aa_muts[n.name]["aa_sequences"][fname] = "".join(aln[n.name])
+
+    return aa_muts
+
 def get_genes_from_file(fname):
     genes = []
     if os.path.isfile(fname):
@@ -314,32 +367,23 @@ def run(args):
                               'strand': 1}
 
     ## determine amino acid mutations for each node
-    if is_vcf:
-        aa_muts = assign_aa_vcf(tree, translations)
-    else:
-        aa_muts = {}
-
-        #fasta input shouldn't have mutations on root, so give empty entry
-        root = tree.get_nonterminals()[0]
-        aa_muts[root.name]={"aa_muts":{}}
-
-        for n in tree.get_nonterminals():
-            for c in n:
-                aa_muts[c.name]={"aa_muts":{}}
-            for fname, aln in translations.items():
-                for c in n:
-                    if c.name in aln and n.name in aln:
-                        tmp = [construct_mut(a, int(pos+1), d) for pos, (a,d) in
-                                enumerate(zip(aln[n.name], aln[c.name])) if a!=d]
-                        aa_muts[c.name]["aa_muts"][fname] = tmp
-                    else:
-                        print("no sequence pair for nodes %s-%s"%(c.name, n.name))
-            if n==tree.root:
-                aa_muts[n.name]={"aa_muts":{}, "aa_sequences":{}}
-                for fname, aln in translations.items():
-                    if n.name in aln:
-                        aa_muts[n.name]["aa_sequences"][fname] = "".join(aln[n.name])
-
+    try:
+        if is_vcf:
+            aa_muts = assign_aa_vcf(tree, translations)
+        else:
+            aa_muts = assign_aa_fasta(tree, translations)
+    except MissingNodeError as err:
+        print("\n*** ERROR: Some/all nodes have no node names!") 
+        print("*** Please check you are providing the tree output by 'augur refine'.")
+        print("*** If you haven't run 'augur refine', please add node names to your tree by running:")
+        print("*** augur refine --tree %s --output-tree <filename>.nwk"%(args.tree) )
+        print("*** And use <filename>.nwk as the tree when running 'ancestral', 'translate', and 'traits'")
+        return 1
+    except MismatchNodeError as err:
+        print("\n*** ERROR: Mismatch between node names in %s and in %s"%(args.tree, args.ancestral_sequences))
+        print("*** Ensure you are using the same tree you used to run 'ancestral' as input here.")
+        print("*** Or, re-run 'ancestral' using %s, then use the new %s as input here."%(args.tree, args.ancestral_sequences))
+        return 1
 
     write_json({'annotations':annotations, 'nodes':aa_muts}, args.output)
     print("amino acid mutations written to",args.output, file=sys.stdout)