-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
… use in `frequencies`
- Loading branch information
Showing
9 changed files
with
409 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import argparse | ||
import datetime | ||
from textwrap import dedent | ||
import isodate | ||
import treetime.utils | ||
|
||
SUPPORTED_DATE_HELP_TEXT = dedent("""\ | ||
1. an Augur-style numeric date with the year as the integer part (e.g. 2020.42) or | ||
2. a date in ISO 8601 date format (i.e. YYYY-MM-DD) (e.g. '2020-06-04') or | ||
3. a backwards-looking relative date in ISO 8601 duration format with optional P prefix (e.g. '1W', 'P1W') | ||
""") | ||
|
||
def numeric_date(date): | ||
""" | ||
Converts the given *date* string to a :py:class:`float`. | ||
*date* may be given as: | ||
1. A string or float (number) with year as the integer part | ||
2. A string in the YYYY-MM-DD (ISO 8601) syntax | ||
3. A string representing a relative date (duration before datetime.date.today()) | ||
>>> numeric_date("2020.42") | ||
2020.42 | ||
>>> numeric_date("2020-06-04") | ||
2020.42486... | ||
>>> import datetime, isodate, treetime | ||
>>> numeric_date("1W") == treetime.utils.numeric_date(datetime.date.today() - isodate.parse_duration("P1W")) | ||
True | ||
""" | ||
# date is numeric | ||
try: | ||
return float(date) | ||
except ValueError: | ||
pass | ||
|
||
# date is in YYYY-MM-DD form | ||
try: | ||
return treetime.utils.numeric_date(datetime.date(*map(int, date.split("-", 2)))) | ||
except ValueError: | ||
pass | ||
|
||
# date is a duration treated as a backwards-looking relative date | ||
try: | ||
# make a copy of date for this block | ||
duration_str = str(date) | ||
if duration_str.startswith('P'): | ||
duration_str = duration_str | ||
else: | ||
duration_str = 'P'+duration_str | ||
return treetime.utils.numeric_date(datetime.date.today() - isodate.parse_duration(duration_str)) | ||
except (ValueError, isodate.ISO8601Error): | ||
pass | ||
|
||
raise ValueError(f"""Unable to determine date from '{date}'. Ensure it is in one of the supported formats:\n{SUPPORTED_DATE_HELP_TEXT}""") | ||
|
||
def numeric_date_type(date): | ||
"""Wraps numeric_date() for argparse usage. | ||
This raises an ArgumentTypeError, otherwise the custom exception message won't be shown in console output due to: | ||
https://github.com/python/cpython/blob/5c4d1f6e0e192653560ae2941a6677fbf4fbd1f2/Lib/argparse.py#L2503-L2513 | ||
""" | ||
try: | ||
return numeric_date(date) | ||
except ValueError as e: | ||
raise argparse.ArgumentTypeError(str(e)) from e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
Integration tests for augur frequencies. | ||
|
||
$ pushd "$TESTDIR" > /dev/null | ||
$ export AUGUR="../../bin/augur" | ||
|
||
Calculate KDE-based tip frequencies from a refined tree. | ||
Timepoints used to estimate frequencies (i.e., "pivots") get calculated from the range of dates in the given metadata. | ||
|
||
$ ${AUGUR} frequencies \ | ||
> --method kde \ | ||
> --tree "frequencies/tree.nwk" \ | ||
> --metadata "frequencies/metadata.tsv" \ | ||
> --pivot-interval 3 \ | ||
> --output "$TMP/tip-frequencies.json" > /dev/null | ||
|
||
$ diff -u --ignore-matching-lines version "frequencies/zika_tip-frequencies.json" "$TMP/tip-frequencies.json" | ||
$ rm -f "$TMP/tip-frequencies.json" | ||
|
||
Calculate KDE-based tip frequencies for a time period with fixed dates. | ||
Pivots get calculated from the requested date range. | ||
|
||
$ ${AUGUR} frequencies \ | ||
> --method kde \ | ||
> --tree "frequencies/tree.nwk" \ | ||
> --metadata "frequencies/metadata.tsv" \ | ||
> --pivot-interval 3 \ | ||
> --min-date 2015-01-01 \ | ||
> --max-date 2016-01-01 \ | ||
> --output "$TMP/tip-frequencies.json" > /dev/null | ||
|
||
$ diff -u --ignore-matching-lines version "frequencies/zika_tip-frequencies_with_fixed_dates.json" "$TMP/tip-frequencies.json" | ||
$ rm -f "$TMP/tip-frequencies.json" | ||
|
||
Calculate KDE-based tip frequencies for a time period with relative dates. | ||
Testing relative dates deterministically from the shell is tricky. | ||
To keep these tests simple and avoid freezing the system date to specific values, this test checks the logical consistency of the requested relative dates and pivot interval. | ||
With a minimum date of 1 year (12 months) ago, a maximum date of 6 months ago, and a pivot interval of 3 months, we expect only 2 pivots in the final output. | ||
Since the test data are much older than the time period requested, all strains will always have frequencies of 0 for the 2 requested pivots. | ||
As long as we always calculate 2 pivots with frequencies of 0 for all strains, we can ignore the actual pivot values calculated for the relative dates in the diff below. | ||
|
||
$ ${AUGUR} frequencies \ | ||
> --method kde \ | ||
> --tree "frequencies/tree.nwk" \ | ||
> --metadata "frequencies/metadata.tsv" \ | ||
> --pivot-interval 3 \ | ||
> --min-date 1Y \ | ||
> --max-date 6M \ | ||
> --output "$TMP/tip-frequencies.json" > /dev/null | ||
|
||
$ python3 "$TESTDIR/../../scripts/diff_jsons.py" \ | ||
> --exclude-paths "root['generated_by']['version']" "root['pivots']" -- \ | ||
> "frequencies/zika_tip-frequencies_with_relative_dates.json" \ | ||
> "$TMP/tip-frequencies.json" | ||
{} | ||
$ rm -f "$TMP/tip-frequencies.json" | ||
|
||
$ popd > /dev/null |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
strain virus accession date region country division city db segment authors url title journal paper_url | ||
PAN/CDC_259359_V1_V3/2015 zika KX156774 2015-12-18 North America Panama Panama Panama genbank genome Shabman et al https://www.ncbi.nlm.nih.gov/nuccore/KX156774 Direct Submission Submitted (29-APR-2016) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ | ||
COL/FLR_00024/2015 zika MF574569 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574569 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ | ||
PRVABC59 zika KU501215 2015-12-XX North America Puerto Rico Puerto Rico Puerto Rico genbank genome Lanciotti et al https://www.ncbi.nlm.nih.gov/nuccore/KU501215 Phylogeny of Zika Virus in Western Hemisphere, 2015 Emerging Infect. Dis. 22 (5), 933-935 (2016) https://www.ncbi.nlm.nih.gov/pubmed/27088323 | ||
COL/FLR_00008/2015 zika MF574562 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574562 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/ | ||
Colombia/2016/ZC204Se zika KY317939 2016-01-06 South America Colombia Colombia Colombia genbank genome Quick et al https://www.ncbi.nlm.nih.gov/nuccore/KY317939 Multiplex PCR method for MinION and Illumina sequencing of Zika and other virus genomes directly from clinical samples Nat Protoc 12 (6), 1261-1276 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538739 | ||
ZKC2/2016 zika KX253996 2016-02-16 Oceania American Samoa American Samoa American Samoa genbank genome Wu et al https://www.ncbi.nlm.nih.gov/nuccore/KX253996 Direct Submission Submitted (18-MAY-2016) Center for Diseases Control and Prevention of Guangdong Province; National Institute of Viral Disease Control and Prevention, China https://www.ncbi.nlm.nih.gov/pubmed/ | ||
VEN/UF_1/2016 zika KX702400 2016-03-25 South America Venezuela Venezuela Venezuela genbank genome Blohm et al https://www.ncbi.nlm.nih.gov/nuccore/KX702400 Complete Genome Sequences of Identical Zika virus Isolates in a Nursing Mother and Her Infant Genome Announc 5 (17), e00231-17 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28450510 | ||
DOM/2016/BB_0059 zika KY785425 2016-04-04 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785425 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 | ||
BRA/2016/FC_6706 zika KY785433 2016-04-08 South America Brazil Brazil Brazil genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785433 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 | ||
DOM/2016/BB_0183 zika KY785420 2016-04-18 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785420 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 | ||
EcEs062_16 zika KX879603 2016-04-XX South America Ecuador Ecuador Ecuador genbank genome Marquez et al https://www.ncbi.nlm.nih.gov/nuccore/KX879603 First Complete Genome Sequences of Zika Virus Isolated from Febrile Patient Sera in Ecuador Genome Announc 5 (8), e01673-16 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28232448 | ||
HND/2016/HU_ME59 zika KY785418 2016-05-13 North America Honduras Honduras Honduras genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785418 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
((Colombia/2016/ZC204Se:0.00105368,(PAN/CDC_259359_V1_V3/2015:0.00076051,(COL/FLR_00008/2015:0.00044440,VEN/UF_1/2016:0.00089377)NODE_0000008:0.00038502)NODE_0000007:0.00019253)NODE_0000001:0.00080159,(BRA/2016/FC_6706:0.00214920,(ZKC2/2016:0.00173693,(HND/2016/HU_ME59:0.00206150,PRVABC59:0.00135309)NODE_0000004:0.00013537,(EcEs062_16:0.00175918,DOM/2016/BB_0183:0.00184905)NODE_0000002:0.00021565)NODE_0000003:0.00013737)NODE_0000005:0.00019772)NODE_0000006:0.00100000; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
{ | ||
"BRA/2016/FC_6706": { | ||
"frequencies": [ | ||
0.022579, | ||
0.011312, | ||
0.206983, | ||
0.102282 | ||
] | ||
}, | ||
"COL/FLR_00008/2015": { | ||
"frequencies": [ | ||
0.243453, | ||
0.208443, | ||
0.008645, | ||
0.010659 | ||
] | ||
}, | ||
"Colombia/2016/ZC204Se": { | ||
"frequencies": [ | ||
0.126056, | ||
0.231597, | ||
0.014167, | ||
0.017099 | ||
] | ||
}, | ||
"DOM/2016/BB_0183": { | ||
"frequencies": [ | ||
0.017888, | ||
0.009342, | ||
0.183671, | ||
0.148759 | ||
] | ||
}, | ||
"EcEs062_16": { | ||
"frequencies": [ | ||
0.018981, | ||
0.009763, | ||
0.190994, | ||
0.134398 | ||
] | ||
}, | ||
"HND/2016/HU_ME59": { | ||
"frequencies": [ | ||
0.009484, | ||
0.006254, | ||
0.090552, | ||
0.457824 | ||
] | ||
}, | ||
"PAN/CDC_259359_V1_V3/2015": { | ||
"frequencies": [ | ||
0.224832, | ||
0.214575, | ||
0.008963, | ||
0.011176 | ||
] | ||
}, | ||
"PRVABC59": { | ||
"frequencies": [ | ||
0.243453, | ||
0.208443, | ||
0.008645, | ||
0.010659 | ||
] | ||
}, | ||
"VEN/UF_1/2016": { | ||
"frequencies": [ | ||
0.030662, | ||
0.016483, | ||
0.206983, | ||
0.070196 | ||
] | ||
}, | ||
"ZKC2/2016": { | ||
"frequencies": [ | ||
0.062612, | ||
0.083787, | ||
0.080395, | ||
0.036947 | ||
] | ||
}, | ||
"generated_by": { | ||
"program": "augur", | ||
"version": "7.0.2" | ||
}, | ||
"pivots": [ | ||
2015.75, | ||
2016.0, | ||
2016.25, | ||
2016.5 | ||
] | ||
} |
Oops, something went wrong.