Skip to content

Commit 348613b

Browse files
committed
Clarify behavior for options that use strain ID
Add reference to --metadata-id-columns which affects the behavior of these options.
1 parent 14302ac commit 348613b

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

Diff for: augur/filter/__init__.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def register_arguments(parser):
6666
"201X-10-01"). Similarly, an ambiguous month makes the
6767
corresponding day ambiguous (e.g., "2010-XX-01").""")
6868
metadata_filter_group.add_argument('--exclude', type=str, nargs="+", metavar="FILE", default=argparse.SUPPRESS,
69-
help="File(s) with list of strains to exclude.")
69+
help="""File(s) with list of strain IDs to exclude. The ID column is
70+
determined by --metadata-id-columns.""")
7071
metadata_filter_group.add_argument('--exclude-where', nargs='+', metavar="CONDITION", default=argparse.SUPPRESS,
7172
help="""Exclude strains matching these conditions. Ex: \"host=rat\" or
7273
\"host!=rat\". Multiple values are processed as OR (matching any of
@@ -75,8 +76,9 @@ def register_arguments(parser):
7576
help="""Exclude all strains by default. Use this with the include
7677
arguments to select a specific subset of strains.""")
7778
metadata_filter_group.add_argument('--include', type=str, nargs="+", metavar="FILE", default=argparse.SUPPRESS,
78-
help="""File(s) with list of strains to include regardless of
79-
priorities, subsampling, or absence of an entry in --sequences.""")
79+
help="""File(s) with list of strain IDs to include regardless of
80+
priorities, subsampling, or absence of an entry in --sequences. The
81+
ID column is determined by --metadata-id-columns.""")
8082
metadata_filter_group.add_argument('--include-where', nargs='+', metavar="CONDITION", default=argparse.SUPPRESS,
8183
help="""Include strains with these values. ex: host=rat. Multiple values
8284
are processed as OR (having any of those specified will be
@@ -128,13 +130,14 @@ def register_arguments(parser):
128130

129131
subsample_group.add_argument('--priority', type=str, metavar="FILE", default=argparse.SUPPRESS,
130132
help="""Tab-delimited file with list of priority scores for strains
131-
(e.g., "<strain>\\t<priority>") and no header. When scores are
133+
(e.g., "<strain ID>\\t<priority>") and no header. When scores are
132134
provided, Augur converts scores to floating point values, sorts
133135
strains within each subsampling group from highest to lowest
134136
priority, and selects the top N strains per group where N is the
135137
calculated or requested number of strains per group. Higher numbers
136138
indicate higher priority. Since priorities represent relative
137-
values between strains, these values can be arbitrary.""")
139+
values between strains, these values can be arbitrary. The ID
140+
column is determined by --metadata-id-columns.""")
138141
subsample_group.add_argument('--subsample-seed', type=int, metavar="N", default=argparse.SUPPRESS,
139142
help="""Random number generator seed to allow reproducible subsampling
140143
(with same input data).""")
@@ -149,7 +152,8 @@ def register_arguments(parser):
149152
output_group.add_argument('--output-metadata', metavar="FILE", default=argparse.SUPPRESS,
150153
help="Metadata for strains that passed filters.")
151154
output_group.add_argument('--output-strains', metavar="FILE", default=argparse.SUPPRESS,
152-
help="List of strains that passed filters (no header).")
155+
help="""List of strain IDs that passed filters (no header). The ID
156+
column is determined by --metadata-id-columns.""")
153157
output_group.add_argument('--output-log', metavar="FILE", default=argparse.SUPPRESS,
154158
help="""Tab-delimited file with one row for each filtered strain and the
155159
reason it was filtered. Keyword arguments used for a given filter

Diff for: tests/functional/filter/cram/filter-help.t

+18-14
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ Show help text
9292
"201X-10-01"). Similarly, an ambiguous month makes the
9393
corresponding day ambiguous (e.g., "2010-XX-01").
9494
--exclude FILE [FILE ...]
95-
File(s) with list of strains to exclude.
95+
File(s) with list of strain IDs to exclude. The ID
96+
column is determined by --metadata-id-columns.
9697
--exclude-where CONDITION [CONDITION ...]
9798
Exclude strains matching these conditions. Ex:
9899
"host=rat" or "host!=rat". Multiple values are
@@ -102,9 +103,10 @@ Show help text
102103
include arguments to select a specific subset of
103104
strains.
104105
--include FILE [FILE ...]
105-
File(s) with list of strains to include regardless of
106-
priorities, subsampling, or absence of an entry in
107-
--sequences.
106+
File(s) with list of strain IDs to include regardless
107+
of priorities, subsampling, or absence of an entry in
108+
--sequences. The ID column is determined by
109+
--metadata-id-columns.
108110
--include-where CONDITION [CONDITION ...]
109111
Include strains with these values. ex: host=rat.
110112
Multiple values are processed as OR (having any of
@@ -151,15 +153,16 @@ Show help text
151153
max-sequences` is provided. (default: True)
152154
--no-probabilistic-sampling
153155
--priority FILE Tab-delimited file with list of priority scores for
154-
strains (e.g., "<strain>\t<priority>") and no header.
155-
When scores are provided, Augur converts scores to
156-
floating point values, sorts strains within each
157-
subsampling group from highest to lowest priority, and
158-
selects the top N strains per group where N is the
159-
calculated or requested number of strains per group.
160-
Higher numbers indicate higher priority. Since
161-
priorities represent relative values between strains,
162-
these values can be arbitrary.
156+
strains (e.g., "<strain ID>\t<priority>") and no
157+
header. When scores are provided, Augur converts
158+
scores to floating point values, sorts strains within
159+
each subsampling group from highest to lowest
160+
priority, and selects the top N strains per group
161+
where N is the calculated or requested number of
162+
strains per group. Higher numbers indicate higher
163+
priority. Since priorities represent relative values
164+
between strains, these values can be arbitrary. The ID
165+
column is determined by --metadata-id-columns.
163166
--subsample-seed N Random number generator seed to allow reproducible
164167
subsampling (with same input data).
165168

@@ -173,7 +176,8 @@ Show help text
173176
--output-metadata FILE
174177
Metadata for strains that passed filters.
175178
--output-strains FILE
176-
List of strains that passed filters (no header).
179+
List of strain IDs that passed filters (no header).
180+
The ID column is determined by --metadata-id-columns.
177181
--output-log FILE Tab-delimited file with one row for each filtered
178182
strain and the reason it was filtered. Keyword
179183
arguments used for a given filter are reported in JSON

0 commit comments

Comments
 (0)