Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Schema updates #727

Merged
merged 3 commits into from
Jun 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions augur/data/schema-auspice-config-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,48 @@
"description": "Defines how the color scale should be constructed",
"type": "string",
"enum": ["continuous", "ordinal", "categorical", "boolean"]
},
"scale": {
"description": "Provided mapping between trait values & hex values",
"$comment": "NOTE: if supplied here, we will not use information supplied to `augur export` via `--colors` for this coloring.",
"type": "array",
"items": {
"type": "array",
"items": [
{
"type": ["string", "number"],
"description": "For categorical/ordinal scales, this is the (string) value of the trait to associate with the colour. For continuous scales this is the (numeric) value to associate to with the colour, and interpolation will be used to span the domain"
},
{"type": "string", "description": "color hex value", "pattern": "^#[0-9A-Fa-f]{6}$"}
]
}
},
"legend": {
"description": "Specify the entries displayed in the legend. This can be used to restrict the entries in the legend for display without otherwise affecting the data viz",
"type": "array",
"items": {
"type": "object",
"required": ["value"],
"properties": {
"value": {
"description": "value to associate with this legend entry. Used to determine colour. For non-continuous scales this also determines the matching between legend items and data.",
"type": ["string", "number"],
"$comment": "Continuous scales must use a numeric value. Other scales can use either."
},
"display": {
"description": "Label to display in the legend. Optional - `value` will be used if this is not provided.",
"type": ["string", "number"]
},
"bounds": {
"description": "(for continuous scales only) provide the lower & upper bounds to match data to this legend entry. Bounds from different legend entries must not overlap. Matching is (a, b] - exclusive of the lower bound, inclusive of the upper.",
"type": "array",
"items": [
{"type": "number", "description": "lower bound"},
{"type": "number", "description": "upper bound"}
]
}
}
}
}
}
}
Expand Down
35 changes: 33 additions & 2 deletions augur/data/schema-export-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@
"minItems": 1,
"items": {
"type": "object",
"description": "Each object here is an indiviual coloring, which will populate the sidebar dropdown in auspice",
"description": "Each object here is an individual coloring, which will populate the sidebar dropdown in auspice",
"required": ["key", "type"],
"properties": {
"key": {
Expand All @@ -180,14 +180,45 @@
},
"scale": {
"description": "Provided mapping between trait values & hex values",
"$comment": "For continuous scales at least 2 items must be specified",
"type": "array",
"items": {
"type": "array",
"items": [
{"type": "string", "description": "value of trait (should exist on >= 1 nodes)"},
{
"type": ["string", "number"],
"description": "For categorical/ordinal scales, this is the (string) value of the trait to associate with the colour. For continuous scales this is the (numeric) value to associate to with the colour, and interpolation will be used to span the domain"
},
{"type": "string", "description": "color hex value", "pattern": "^#[0-9A-Fa-f]{6}$"}
]
}
},
"legend": {
"description": "Specify the entries displayed in the legend. This can be used to restrict the entries in the legend for display without otherwise affecting the data viz",
"type": "array",
"items": {
"type": "object",
"required": ["value"],
"properties": {
"value": {
"description": "value to associate with this legend entry. Used to determine colour. For non-continuous scales this also determines the matching between legend items and data.",
"type": ["string", "number"],
"$comment": "Continuous scales must use a numeric value. Other scales can use either."
},
"display": {
"description": "Label to display in the legend. Optional - `value` will be used if this is not provided.",
"type": ["string", "number"]
},
"bounds": {
"description": "(for continuous scales only) provide the lower & upper bounds to match data to this legend entry. Bounds from different legend entries must not overlap. Matching is (a, b] - exclusive of the lower bound, inclusive of the upper.",
jameshadfield marked this conversation as resolved.
Show resolved Hide resolved
"type": "array",
"items": [
{"type": "number", "description": "lower bound"},
{"type": "number", "description": "upper bound"}
]
}
}
}
}
}
}
Expand Down
69 changes: 65 additions & 4 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
from collections import defaultdict, deque
import warnings
import numbers
import re
from Bio import Phylo
from .utils import read_metadata, read_node_data, write_json, read_config, read_lat_longs, read_colors
Expand Down Expand Up @@ -211,8 +212,38 @@ def _get_title(key):
return key

def _add_color_scale(coloring):
## consider various sources to find any user-provided scale information
huddlej marked this conversation as resolved.
Show resolved Hide resolved
key = coloring["key"]
if key.lower() in provided_colors:
scale_type = coloring["type"]
if scale_type is "continuous":
## continuous scale information can only come from an auspice config JSON
if config.get(key, {}).get("scale"):
# enforce numeric values (we can't use the schema for this)
provided_scale = [s for s in config[key]['scale'] if isinstance(s[0], numbers.Number)]
if len(provided_scale)<2:
warn(f"The scale provided for {key} had fewer than 2 (valid numeric) entries. Skipping.")
return coloring
coloring["scale"] = provided_scale
return coloring
elif config.get(key, {}).get("scale"):
# If the auspice config JSON (`config`) explicitly defines a scale for this coloring
# then we use this instead of any colors provided via a TSV file (`provided_colors`)
values_in_tree = get_values_across_nodes(node_attrs, key)
jameshadfield marked this conversation as resolved.
Show resolved Hide resolved
scale = []
provided_values_unseen_in_tree = []
for info in config[key]['scale']:
if info[0] in values_in_tree:
scale.append(info)
else:
provided_values_unseen_in_tree.append(info[0])
if len(scale):
coloring["scale"] = scale
if len(provided_values_unseen_in_tree):
warn(f"The configuration JSON specifies colors for \"{key}\" which aren't found in the tree:\n\t{', '.join(provided_values_unseen_in_tree)}.")
return coloring
warn(f"The configuration JSON specifies a color scale for {key} however none of the values in the tree are in this scale! Auspice will generate its own color scale for this trait.")
elif key.lower() in provided_colors:
# `provided_colors` typically originates from a colors.tsv file
scale = []
trait_values = {str(val).lower(): val for val in get_values_across_nodes(node_attrs, key)}
trait_values_unseen = {k for k in trait_values}
Expand All @@ -223,9 +254,37 @@ def _add_color_scale(coloring):
if len(scale):
coloring["scale"] = scale
if len(trait_values_unseen):
warn("These values for trait {} were not specified in your provided color scale: {}. Auspice will create colors for them.".format(key, ", ".join(trait_values_unseen)))
else:
warn("You've specified a color scale for {} but none of the values found on the tree had associated colors. Auspice will generate its own color scale for this trait.".format(key))
warn(f"These values for trait {key} were not specified in the colors file you provided:\n\t{', '.join(trait_values_unseen)}.\n\tAuspice will create colors for them.")
return coloring
warn(f"You've supplied a colors file with information for {key} but none of the values found on the tree had associated colors. Auspice will generate its own color scale for this trait.")
# Fallthrough (no scale information provided means that auspice will generate its own scale)
return coloring

def _add_legend(coloring):
"""
If there is a config-defined legend entry for this coloring, then add it to
the coloring object under the key "legend"

Enclosing scope variables used
------------------------------
config : object

Parameters
----------
coloring : object

Returns
-------
object :
returns the single input parameter, potentially modified in place
"""
key = coloring["key"]
if config.get(key, {}).get("legend"):
# The structure of the legend in the auspice config file is the same as the exported
# auspice dataset JSON, and the schema validates this for us so we don't have to check here.
# Note that if the data is inconsistent (e.g. overlapping bounds) then auspice will
# discard them (and print a console warning)
coloring['legend'] = config[key]['legend']
return coloring

def _add_title_and_type(coloring):
Expand Down Expand Up @@ -305,6 +364,8 @@ def _get_colorings():
colorings = [x for x in [_add_title_and_type(coloring) for coloring in colorings] if x]
# for each coloring, if colors have been provided, save them as a "scale"
colorings = [_add_color_scale(coloring) for coloring in colorings]
# for each coloring, pass-through the `legend` data from auspice_config.json, if provided
colorings = [_add_legend(coloring) for coloring in colorings]
# save them to the data json to be exported
data_json['meta']["colorings"] = colorings

Expand Down
33 changes: 33 additions & 0 deletions tests/functional/export_v2.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Integration tests for augur export v2.

$ pushd "$TESTDIR" > /dev/null
$ export AUGUR="../../bin/augur"

Minimal export

$ ${AUGUR} export v2 \
> --tree export_v2/tree.nwk \
> --node-data export_v2/div_node-data.json \
> --output "$TMP/minimal.json" &>/dev/null
[2]

$ python3 "$TESTDIR/../../scripts/diff_jsons.py" export_v2/minimal.json "$TMP/minimal.json" \
> --exclude-paths "root['meta']['updated']"
{}

Future test:
Run augur export _without_ any node-data JSONs when this can read divergence values from the newick file
and compare this to the tree using `div_node-data.json` - they should be identical.
See https://github.com/nextstrain/augur/issues/273 for more


Export with auspice config JSON which defines scale & legend settings
$ ${AUGUR} export v2 \
> --tree export_v2/tree.nwk \
> --node-data export_v2/div_node-data.json export_v2/location_node-data.json \
> --auspice-config export_v2/auspice_config1.json \
> --output "$TMP/dataset1.json" &>/dev/null

$ python3 "$TESTDIR/../../scripts/diff_jsons.py" export_v2/dataset1.json "$TMP/dataset1.json" \
> --exclude-paths "root['meta']['updated']"
{}
32 changes: 32 additions & 0 deletions tests/functional/export_v2/auspice_config1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"colorings": [
{
"key": "location",
"title": "Location",
"type": "categorical",
"legend": [
{"value": "alpha", "display": "α"},
{"value": "beta"}
],
"scale": [
["beta", "#bd0026"],
["gamma", "#6a51a3"]
]
},
{
"key": "mutation_length",
"title": "Mutations per branch",
"type": "continuous",
"legend": [
{"value": 1, "display": "0-2", "bounds": [-1,2]},
{"value": 3, "display": "3-5", "bounds": [2,5]},
{"value": 5, "display": ">5", "bounds": [5, 10]}
],
"scale": [
[1, "#081d58"],
[3, "#1d91c0"],
[5, "#c7e9b4"]
]
Comment on lines +20 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really helpful documentation of how to use these new features! I will probably refer to these tests as I learn how to use these features.

}
]
}
Loading