Skip to content

Commit

Permalink
Merge pull request #621 from nextstrain/improve-validate
Browse files Browse the repository at this point in the history
Improve validation of Auspice JSONs
  • Loading branch information
huddlej authored Oct 21, 2020
2 parents 295ad4f + 1cd2177 commit e209103
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
4 changes: 2 additions & 2 deletions augur/data/schema-export-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@
},
"children": {
"description": "Child nodes. Recursive structure. Terminal nodes do not have this property.",
"$comment": "Polytomies (more than 2 items) allowed.",
"$comment": "Polytomies (more than 2 items) allowed, as are nodes with a single child.",
"type": "array",
"minItems": 2,
"minItems": 1,
"items": {"$ref": "#/properties/tree"}
}
}
Expand Down
16 changes: 16 additions & 0 deletions augur/validate_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@
import sys
from collections import defaultdict

def ensure_no_duplicate_names(root, ValidateError):
"""
Check that all node names are identical, which is required for auspice (v2) JSONs.
"""
names = set()
def recurse(node):
if node["name"] in names:
raise ValidateError(f"Node {node['name']} appears multiple times in the tree.")
names.add(node["name"])
if "children" in node:
[recurse(child) for child in node["children"]]
recurse(root)


def collectTreeAttrsV2(root, warn):
"""
Collect all keys specified on `node["node_attrs"]` throughout the tree
Expand Down Expand Up @@ -82,6 +96,8 @@ def warn(msg):

print("Validating that the JSON is internally consistent...")

ensure_no_duplicate_names(data["tree"], ValidateError)

if "entropy" in data["meta"]["panels"] and "genome_annotations" not in data["meta"]:
warn("The entropy panel has been specified but annotations don't exist.")

Expand Down
30 changes: 30 additions & 0 deletions tests/test_validate_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import Bio.Phylo
from io import StringIO
from pathlib import Path
import pytest
import sys

# we assume (and assert) that this script is running from the tests/ directory
sys.path.append(str(Path(__file__).parent.parent.parent))

from augur.export_v2 import convert_tree_to_json_structure
from augur.validate import ValidateError
from augur.validate_export import ensure_no_duplicate_names


class TestValidateExport():
def test_export_without_duplicate_names(self):
# Create a tree with unique tip names.
tree = Bio.Phylo.read(StringIO("root(A, internal(B, C))"), "newick")
metadata = {"A": {}, "B": {}, "C": {}, "root": {}, "internal": {}}
root = convert_tree_to_json_structure(tree.root, metadata)
ensure_no_duplicate_names(root, ValidateError)

def test_export_with_duplicate_names(self):
# Create a tree with duplicate tip names.
tree = Bio.Phylo.read(StringIO("root(A, internal(B, B))"), "newick")
metadata = {"A": {}, "B": {}, "root": {}, "internal": {}}
root = convert_tree_to_json_structure(tree.root, metadata)

with pytest.raises(ValidateError):
ensure_no_duplicate_names(root, ValidateError)

0 comments on commit e209103

Please sign in to comment.