Skip to content

Commit

Permalink
Refactor measurements module to a package
Browse files Browse the repository at this point in the history
Moves the measurements module to its own package where the subcommands
are split out into their own modules. The subcommand subparsers are
added with the new `add_command_subparsers` function.

This is an effort to reorganize augur commands with subcommands as
packages instead of continuously adding subcommands as modules.
Starting with the measurements command since it is a new command that is
unlikely to already be used by anyone outside of the Nextstrain. In the
future, we may want to consider refactoring export and validate
in the same way. Putting that off for now since reorganizing those
modules may result in breaking outside uses of their APIs.
  • Loading branch information
joverlee521 committed Jul 14, 2022
1 parent d531d48 commit dc4f58b
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 134 deletions.
19 changes: 19 additions & 0 deletions augur/measurements/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Create JSON files suitable for visualization within the measurements panel of Auspice.
"""
from augur.argparse_ import add_command_subparsers
from augur.utils import first_line
from . import export, concat

SUBCOMMANDS = [
export,
concat,
]


def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("measurements", help=first_line(__doc__))
# Add subparsers for subcommands
subparsers = parser.add_subparsers(dest='subcommand')
add_command_subparsers(subparsers, SUBCOMMANDS)
return parser
56 changes: 56 additions & 0 deletions augur/measurements/concat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""
Concatenate multiple measurements JSONs into a single JSON file
"""
import sys

from augur.utils import first_line, write_json
from augur.validate import (
measurements as read_measurements_json,
ValidateError
)


def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("concat", help=first_line(__doc__))

concat_required = parser.add_argument_group(
title="REQUIRED"
)
concat_required.add_argument("--jsons", required=True, type=str, nargs="+", metavar="JSONs",
help="Measurement JSON files to concatenate.")
concat_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
help="Output JSON file")

concat_optional = parser.add_argument_group(
title="OPTIONAL SETTINGS"
)
concat_optional.add_argument("--default-collection", type=str,
help="The key of the default collection to display. " +
"If not provided, the first collection of the first JSON file will be displayed")
concat_optional.add_argument("--minify-json", action="store_true",
help="Concatenate JSONs without indentation or line returns.")

return parser


def run(args):
output = {
'collections': []
}
if args.default_collection is not None:
output['default_collection'] = args.default_collection

for json in args.jsons:
measurements = read_measurements_json(json)
output['collections'].extend(measurements['collections'])

indent = {"indent": None} if args.minify_json else {}
write_json(output, args.output_json, include_version=False, **indent)
try:
read_measurements_json(measurements_json=args.output_json)
except ValidateError:
print(
"ERROR: Validation of output JSON failed. See detailed errors above.",
file=sys.stderr,
)
sys.exit(1)
215 changes: 81 additions & 134 deletions augur/measurements.py → augur/measurements/export.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""
Create JSON files suitable for visualization within the measurements panel of Auspice.
Export a measurements JSON for a single collection
"""
import os
import pandas as pd
import sys

from .argparse_ import HideAsFalseAction
from .utils import first_line, write_json
from .validate import (
from augur.argparse_ import HideAsFalseAction
from augur.utils import first_line, write_json
from augur.validate import (
measurements as read_measurements_json,
measurements_collection_config as read_collection_config_json,
ValidateError
Expand All @@ -22,7 +22,83 @@
}


def export_measurements(args):
def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("export", help=first_line(__doc__))

export_required = parser.add_argument_group(
title="REQUIRED"
)
export_required.add_argument("--collection", required=True, metavar="TSV",
help="Collection of measurements and metadata in a TSV file. " +
"Keep in mind duplicate columns will be renamed as 'X', 'X.1', 'X.2'...'X.N'")
export_required.add_argument("--strain-column", default="strain",
help="Name of the column containing strain names. " +
"Provided column will be renamed to `strain` so please make sure no other columns are named `strain`. " +
"Strain names in this column should match the strain names in the corresponding Auspice dataset JSON. " +
"(default: %(default)s)")
export_required.add_argument("--value-column", default="value",
help="Name of the column containing the numeric values to be plotted for the given collection. " +
"Provided column will be renamed to `value` so please make sure no other columns are named `value`. " +
"(default: %(default)s)")
export_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
help="Output JSON file. " +
"The file name must follow the Auspice sidecar file naming convention to be recognized as a sidecar file. " +
"See Nextstrain data format docs for more details.")

export_config = parser.add_argument_group(
title="COLLECTION CONFIGURATION",
description="These options control the configuration of the collection for Auspice. " +
"You can provide a config JSON (which includes all available options) or " +
"command line arguments (which are more limited). " +
"Command line arguments will override the values set in the config JSON."
)
export_config.add_argument("--collection-config", metavar="JSON",
help="Collection configuration file for advanced configurations. ")
export_config.add_argument("--grouping-column", nargs="+",
help="Name of the column(s) that should be used as grouping(s) for measurements. " +
"Note that if groupings are provided via command line args, the default group-by " +
"field in the config JSON will be dropped.")
export_config.add_argument("--key",
help="A short key name of the collection for internal use within Auspice. " +
"If not provided via config or command line option, the collection TSV filename will be used. ")
export_config.add_argument("--title",
help="The full title of the collection to display in the measurements panel title. " +
f"If not provided via config or command line option, the panel's default title is {DEFAULT_ARGS['title']!r}.")
export_config.add_argument("--x-axis-label",
help="The short label to display for the x-axis that describles the value of the measurements. " +
"If not provided via config or command line option, the panel's default " +
f"x-axis label is {DEFAULT_ARGS['x_axis_label']!r}.")
export_config.add_argument("--threshold", type=float,
help="A measurements value threshold to be displayed in the measurements panel.")
export_config.add_argument("--filters", nargs="+",
help="The columns that are to be used a filters for measurements. " +
"If not provided, all columns will be available as filters.")
export_config.add_argument("--group-by", type=str,
help="The default grouping column. If not provided, the first grouping will be used.")
export_config.add_argument("--measurements-display", type=str, choices=["raw", "mean"],
help="The default display of the measurements")

export_config.add_argument("--show-overall-mean", "--hide-overall-mean",
dest="show_overall_mean", action=HideAsFalseAction, nargs=0,
help="Show or hide the overall mean per group by default")
export_config.add_argument("--show-threshold", "--hide-threshold",
dest="show_threshold", action=HideAsFalseAction, nargs=0,
help="Show or hide the threshold by default. This will be ignored if no threshold is provided.")

export_optional = parser.add_argument_group(
title="OPTIONAL SETTINGS"
)
export_optional.add_argument("--include-columns", nargs="+",
help="The columns to include from the collection TSV in the measurements JSON. " +
"Be sure to list columns that are used as groupings and/or filters. " +
"If no columns are provided, then all columns will be included by default.")
export_optional.add_argument("--minify-json", action="store_true",
help="Export JSON without indentation or line returns.")

return parser


def run(args):
# Default value to None so all columns will be read
columns_to_include = None
if args.include_columns is not None:
Expand Down Expand Up @@ -174,132 +250,3 @@ def export_measurements(args):
file=sys.stderr,
)
sys.exit(1)


def concat_measurements(args):
output = {
'collections': []
}
if args.default_collection is not None:
output['default_collection'] = args.default_collection

for json in args.jsons:
measurements = read_measurements_json(json)
output['collections'].extend(measurements['collections'])

indent = {"indent": None} if args.minify_json else {}
write_json(output, args.output_json, include_version=False, **indent)
try:
read_measurements_json(measurements_json=args.output_json)
except ValidateError:
print(
"ERROR: Validation of output JSON failed. See detailed errors above.",
file=sys.stderr,
)
sys.exit(1)


def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("measurements", help=first_line(__doc__))
subparsers = parser.add_subparsers(dest='subcommand')
subparsers.required = True

export = subparsers.add_parser("export", help="Export a measurements JSON for a single collection")

export_required = export.add_argument_group(
title="REQUIRED"
)
export_required.add_argument("--collection", required=True, metavar="TSV",
help="Collection of measurements and metadata in a TSV file. " +
"Keep in mind duplicate columns will be renamed as 'X', 'X.1', 'X.2'...'X.N'")
export_required.add_argument("--strain-column", default="strain",
help="Name of the column containing strain names. " +
"Provided column will be renamed to `strain` so please make sure no other columns are named `strain`. " +
"Strain names in this column should match the strain names in the corresponding Auspice dataset JSON. " +
"(default: %(default)s)")
export_required.add_argument("--value-column", default="value",
help="Name of the column containing the numeric values to be plotted for the given collection. " +
"Provided column will be renamed to `value` so please make sure no other columns are named `value`. " +
"(default: %(default)s)")
export_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
help="Output JSON file. " +
"The file name must follow the Auspice sidecar file naming convention to be recognized as a sidecar file. " +
"See Nextstrain data format docs for more details.")

export_config = export.add_argument_group(
title="COLLECTION CONFIGURATION",
description="These options control the configuration of the collection for Auspice. " +
"You can provide a config JSON (which includes all available options) or " +
"command line arguments (which are more limited). " +
"Command line arguments will override the values set in the config JSON."
)
export_config.add_argument("--collection-config", metavar="JSON",
help="Collection configuration file for advanced configurations. ")
export_config.add_argument("--grouping-column", nargs="+",
help="Name of the column(s) that should be used as grouping(s) for measurements. " +
"Note that if groupings are provided via command line args, the default group-by " +
"field in the config JSON will be dropped.")
export_config.add_argument("--key",
help="A short key name of the collection for internal use within Auspice. " +
"If not provided via config or command line option, the collection TSV filename will be used. ")
export_config.add_argument("--title",
help="The full title of the collection to display in the measurements panel title. " +
f"If not provided via config or command line option, the panel's default title is {DEFAULT_ARGS['title']!r}.")
export_config.add_argument("--x-axis-label",
help="The short label to display for the x-axis that describles the value of the measurements. " +
"If not provided via config or command line option, the panel's default " +
f"x-axis label is {DEFAULT_ARGS['x_axis_label']!r}.")
export_config.add_argument("--threshold", type=float,
help="A measurements value threshold to be displayed in the measurements panel.")
export_config.add_argument("--filters", nargs="+",
help="The columns that are to be used a filters for measurements. " +
"If not provided, all columns will be available as filters.")
export_config.add_argument("--group-by", type=str,
help="The default grouping column. If not provided, the first grouping will be used.")
export_config.add_argument("--measurements-display", type=str, choices=["raw", "mean"],
help="The default display of the measurements")

export_config.add_argument("--show-overall-mean", "--hide-overall-mean",
dest="show_overall_mean", action=HideAsFalseAction, nargs=0,
help="Show or hide the overall mean per group by default")
export_config.add_argument("--show-threshold", "--hide-threshold",
dest="show_threshold", action=HideAsFalseAction, nargs=0,
help="Show or hide the threshold by default. This will be ignored if no threshold is provided.")

export_optional = export.add_argument_group(
title="OPTIONAL SETTINGS"
)
export_optional.add_argument("--include-columns", nargs="+",
help="The columns to include from the collection TSV in the measurements JSON. " +
"Be sure to list columns that are used as groupings and/or filters. " +
"If no columns are provided, then all columns will be included by default.")
export_optional.add_argument("--minify-json", action="store_true",
help="Export JSON without indentation or line returns.")


concat = subparsers.add_parser("concat", help="Concatenate multiple measurements JSONs into a single JSON file")
concat_required = concat.add_argument_group(
title="REQUIRED"
)
concat_required.add_argument("--jsons", required=True, type=str, nargs="+", metavar="JSONs",
help="Measurement JSON files to concatenate.")
concat_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
help="Output JSON file")

concat_optional = concat.add_argument_group(
title="OPTIONAL SETTINGS"
)
concat_optional.add_argument("--default-collection", type=str,
help="The key of the default collection to display. " +
"If not provided, the first collection of the first JSON file will be displayed")
concat_optional.add_argument("--minify-json", action="store_true",
help="Concatenate JSONs without indentation or line returns.")
return parser



def run(args):
if args.subcommand == 'export':
return export_measurements(args)
if args.subcommand == "concat":
return concat_measurements(args)

0 comments on commit dc4f58b

Please sign in to comment.