diff --git a/CHANGES.md b/CHANGES.md index c4b3683d3..a627b917b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,11 @@ ## __NEXT__ +### Features + +* titers: Support parsing of thresholded values (e.g., "<80" or ">2560"). [#1118][] (@huddlej) + +[#1118]: https://github.com/nextstrain/augur/pull/1118 ## 19.2.0 (19 December 2022) diff --git a/augur/titer_model.py b/augur/titer_model.py index b7aa60358..8b60848da 100644 --- a/augur/titer_model.py +++ b/augur/titer_model.py @@ -41,22 +41,51 @@ def load_from_file(filenames, excluded_sources=None): >>> len(measurements) 11 - >>> measurements[("A/Acores/11/2013", ("A/Alabama/5/2010", "F27/10"))] - [80.0] >>> len(strains) 13 >>> len(sources) 5 + + Inspect specific measurements. First, inspect a measurement that has a + specific value in the input. + + >>> measurements[("A/Acores/11/2013", ("A/Alabama/5/2010", "F27/10"))] + [80.0] + + Next, inspect a measurement that has a thresholded value at the lower + bound of detection (e.g., "<80"). This measurement should be reported as + one half of its threshold value (e.g., 40.0). + + >>> measurements[("A/Acores/11/2013", ("A/Victoria/208/2009", "F7/10"))] + [40.0] + + Inspect a measurement that has a thresholded value at the upper bound of + detection (">1280"). This measurement should be reported as twice its + threshold value (e.g., 2560.0). + + >>> measurements[("A/Acores/SU43/2012", ("A/Texas/50/2012", "F36/12"))] + [2560.0] + + Confirm that excluding sources produces fewer measurements. + >>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv", excluded_sources=["NIMR_Sep2013_7-11.csv"]) >>> len(measurements) 5 + + Request measurements for a test/reference/serum tuple that should not + exist after excluding its source. + >>> measurements.get(("A/Acores/11/2013", ("A/Alabama/5/2010", "F27/10"))) >>> + + Missing titer data should produce an error. + >>> output = TiterCollection.load_from_file("tests/data/titer_model/missing.tsv") Traceback (most recent call last): File "", line 1, in open("tests/data/titer_model/missing.tsv", "r") FileNotFoundError: [Errno 2] No such file or directory: 'tests/data/titer_model/missing.tsv' + """ if excluded_sources is None: excluded_sources = [] @@ -70,10 +99,21 @@ def load_from_file(filenames, excluded_sources=None): with open_file(fname, 'r') as infile: for line in infile: entries = line.strip().split('\t') + titer = entries[4] try: - val = float(entries[4]) - except: + # Convert values below or above the measurement + # threshold (e.g., "<80" or ">2560") to half or twice + # their thresholded value, respectively, so they can be + # included in models instead of being discarded. + if titer.startswith("<"): + val = float(titer[1:]) / 2 + elif titer.startswith(">"): + val = float(titer[1:]) * 2 + else: + val = float(titer) + except ValueError: continue + test, ref_virus, serum, src_id = (entries[0], entries[1],entries[2], entries[3]) diff --git a/tests/data/titer_model/h3n2_titers_subset.tsv b/tests/data/titer_model/h3n2_titers_subset.tsv index 5515bb526..4991001fb 100644 --- a/tests/data/titer_model/h3n2_titers_subset.tsv +++ b/tests/data/titer_model/h3n2_titers_subset.tsv @@ -1,12 +1,12 @@ A/Acores/11/2013 A/Alabama/5/2010 F27/10 NIMR_Sep2013_7-11.csv 80 hi A/Acores/11/2013 A/Athens/112/2012 F16/12 NIMR_Sep2013_7-11.csv 640 hi A/Acores/11/2013 A/Berlin/93/2011 T/CF11/12 NIMR_Sep2013_7-11.csv 640 hi -A/Acores/11/2013 A/Victoria/208/2009 F7/10 NIMR_Sep2013_7-11.csv 80 hi +A/Acores/11/2013 A/Victoria/208/2009 F7/10 NIMR_Sep2013_7-11.csv <80 hi A/Acores/11/2013 A/Stockholm/18/2011 F28/11 NIMR_Sep2013_7-11.csv 160 hi A/Acores/SU43/2012 A/Alabama/5/2010 F27/10 NIMR_Feb2013_18.csv 320 hi A/Acores/SU43/2012 A/Hawaii/22/2012 F37/12 NIMR_Feb2013_18.csv 320 hi A/Acores/11/2013 A/Perth/16/2009 F35/11 NIMR_Sep2013_7-11.csv 40 hi -A/Acores/SU43/2012 A/Texas/50/2012 F36/12 NIMR_Feb2013_18.csv 1280 hi +A/Acores/SU43/2012 A/Texas/50/2012 F36/12 NIMR_Feb2013_18.csv >1280 hi A/Adana/116/2014 A/Iowa/19/2010 F15/11 NIMR_Feb2014_9-09.csv 80 hi A/Cairo/63/2012 A/Texas/50/2012 F36/12 NIMR_Feb2013_16.csv 1280 hi A/Cairo/63/2012 A/Texas/50/2012 F36/12 NIMR_Sep2013_7-04.csv 640 hi diff --git a/tests/functional/titers/cram/titers-sub-with-tree-and-custom-prefix.t b/tests/functional/titers/cram/titers-sub-with-tree-and-custom-prefix.t index efc9de179..5431b399d 100644 --- a/tests/functional/titers/cram/titers-sub-with-tree-and-custom-prefix.t +++ b/tests/functional/titers/cram/titers-sub-with-tree-and-custom-prefix.t @@ -13,8 +13,8 @@ Test titer substitution model with alignment and tree inputs and a custom prefix > --attribute-prefix custom_prefix_ \ > --output $TMP/titers-sub.json > /dev/null Read titers from ../data/titers.tsv, found: - --- 61 strains + --- 62 strains --- 15 data sources - --- 232 total measurements + --- 272 total measurements $ grep custom_prefix_cTiterSub $TMP/titers-sub.json | wc -l \s*120 (re) diff --git a/tests/functional/titers/cram/titers-sub-with-tree.t b/tests/functional/titers/cram/titers-sub-with-tree.t index d2c68589a..a8804e441 100644 --- a/tests/functional/titers/cram/titers-sub-with-tree.t +++ b/tests/functional/titers/cram/titers-sub-with-tree.t @@ -12,8 +12,8 @@ Test titer substitution model with alignment and tree inputs. > --gene-names HA1 \ > --output $TMP/titers-sub.json > /dev/null Read titers from ../data/titers.tsv, found: - --- 61 strains + --- 62 strains --- 15 data sources - --- 232 total measurements + --- 272 total measurements $ grep cTiterSub $TMP/titers-sub.json | wc -l \s*120 (re) diff --git a/tests/functional/titers/cram/titers-tree-with-custom-prefix.t b/tests/functional/titers/cram/titers-tree-with-custom-prefix.t index 5fa165548..1282f7b35 100644 --- a/tests/functional/titers/cram/titers-tree-with-custom-prefix.t +++ b/tests/functional/titers/cram/titers-tree-with-custom-prefix.t @@ -11,8 +11,8 @@ Test titer tree model with a custom prefix for the node data attributes in the o > --attribute-prefix custom_prefix_ \ > --output $TMP/titers-tree.json > /dev/null Read titers from ../data/titers.tsv, found: - --- 61 strains + --- 62 strains --- 15 data sources - --- 232 total measurements + --- 272 total measurements $ grep custom_prefix_cTiter $TMP/titers-tree.json | wc -l \s*120 (re) diff --git a/tests/functional/titers/cram/titers-tree.t b/tests/functional/titers/cram/titers-tree.t index 805eb728e..2d0b66156 100644 --- a/tests/functional/titers/cram/titers-tree.t +++ b/tests/functional/titers/cram/titers-tree.t @@ -10,8 +10,8 @@ Test titer tree model. > --titers ../data/titers.tsv \ > --output $TMP/titers-tree.json > /dev/null Read titers from ../data/titers.tsv, found: - --- 61 strains + --- 62 strains --- 15 data sources - --- 232 total measurements + --- 272 total measurements $ grep cTiter $TMP/titers-tree.json | wc -l \s*120 (re)