Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve validation of Auspice JSONs #621

Merged
merged 3 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions augur/data/schema-export-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@
},
"children": {
"description": "Child nodes. Recursive structure. Terminal nodes do not have this property.",
"$comment": "Polytomies (more than 2 items) allowed.",
"$comment": "Polytomies (more than 2 items) allowed, as are nodes with a single child.",
"type": "array",
"minItems": 2,
"minItems": 1,
"items": {"$ref": "#/properties/tree"}
}
}
Expand Down
16 changes: 16 additions & 0 deletions augur/validate_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@
import sys
from collections import defaultdict

def ensure_no_duplicate_names(root, ValidateError):
"""
Check that all node names are identical, which is required for auspice (v2) JSONs.
"""
names = set()
def recurse(node):
if node["name"] in names:
raise ValidateError(f"Node {node['name']} appears multiple times in the tree.")
names.add(node["name"])
if "children" in node:
[recurse(child) for child in node["children"]]
recurse(root)


def collectTreeAttrsV2(root, warn):
"""
Collect all keys specified on `node["node_attrs"]` throughout the tree
Expand Down Expand Up @@ -82,6 +96,8 @@ def warn(msg):

print("Validating that the JSON is internally consistent...")

ensure_no_duplicate_names(data["tree"], ValidateError)

if "entropy" in data["meta"]["panels"] and "genome_annotations" not in data["meta"]:
warn("The entropy panel has been specified but annotations don't exist.")

Expand Down
30 changes: 30 additions & 0 deletions tests/test_validate_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import Bio.Phylo
from io import StringIO
from pathlib import Path
import pytest
import sys

# we assume (and assert) that this script is running from the tests/ directory
sys.path.append(str(Path(__file__).parent.parent.parent))

from augur.export_v2 import convert_tree_to_json_structure
from augur.validate import ValidateError
from augur.validate_export import ensure_no_duplicate_names


class TestValidateExport():
def test_export_without_duplicate_names(self):
# Create a tree with unique tip names.
tree = Bio.Phylo.read(StringIO("root(A, internal(B, C))"), "newick")
metadata = {"A": {}, "B": {}, "C": {}, "root": {}, "internal": {}}
root = convert_tree_to_json_structure(tree.root, metadata)
ensure_no_duplicate_names(root, ValidateError)

def test_export_with_duplicate_names(self):
# Create a tree with duplicate tip names.
tree = Bio.Phylo.read(StringIO("root(A, internal(B, B))"), "newick")
metadata = {"A": {}, "B": {}, "root": {}, "internal": {}}
root = convert_tree_to_json_structure(tree.root, metadata)

with pytest.raises(ValidateError):
ensure_no_duplicate_names(root, ValidateError)