Skip to content

Commit 917a080

Browse files
authored
improve stat hiding filters in lineplot_and_heatmap (#182)
In `lineplot_and_heatmap`, mutations that are hidden are by one slider filtered even if they fail other sliders. Previously, they were dropped rather than hidden. Addresses this issue: dms-vep/dms-vep-pipeline-3#96
1 parent 6803e48 commit 917a080

File tree

9 files changed

+370
-624
lines changed

9 files changed

+370
-624
lines changed

CHANGELOG.rst

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ All notable changes to this project will be documented in this file.
66

77
The format is based on `Keep a Changelog <https://keepachangelog.com>`_.
88

9+
6.10
10+
----
11+
- In ``lineplot_and_heatmap``, mutations that are hidden are by one slider filtered even if they fail other sliders. Addresses `this issue <https://github.com/dms-vep/dms-vep-pipeline-3/issues/96>`_
12+
913
6.9
1014
---
1115
- Added ``rename_std`` option to ``lineplot_and_heatmaps``, which fixes a quasi-bug introduced in the ``rename_stat_col`` option by the changes in version 6.8.

notebooks/real_LyCoV1404.ipynb

+63-161
Large diffs are not rendered by default.

polyclonal/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
__author__ = "`the Bloom lab <https://research.fhcrc.org/bloom/en.html>`_"
3333
__email__ = "[email protected]"
34-
__version__ = "6.9"
34+
__version__ = "6.10"
3535
__url__ = "https://github.com/jbloomlab/polyclonal"
3636

3737
from polyclonal.alphabets import AAS

polyclonal/pdb_utils.py

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
88
"""
99

10-
1110
import collections # noqa: F401
1211
import itertools
1312
import os # noqa: F401

polyclonal/plot.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
88
"""
99

10-
1110
import functools
1211
import math
1312
import operator
@@ -515,8 +514,10 @@ def lineplot_and_heatmap(
515514
indicated in the slider.
516515
addtl_slider_stats_hide_not_filter : None or list
517516
By default, `addtl_slider_stats` are filtered entirely from data set. If you just
518-
them excluded from lineplot calculation but marked as filtered on heat map,
519-
add names of stats to this list.
517+
them excluded from lineplot but marked as hidden on heat map (eg, gray box),
518+
add names of stats to this list. Mutations that fail one of these hiding filters
519+
are always shown as hidden on the heat map rather than fully excluded, even if
520+
they fail other filters in `addtl_slider_stats`.
520521
init_floor_at_zero : bool
521522
Initial value for option to put floor of zero on value is `stat_col`.
522523
init_site_statistic : {'sum', 'mean', 'max', 'min'}
@@ -657,7 +658,6 @@ def replace_std(col):
657658
.assign(**{stat_col: lambda x: x[stat_col] * scale_stat_col})
658659
)
659660

660-
# filter `data_df` by any minimums in `slider_binding_range_kwargs`
661661
if slider_binding_range_kwargs is None:
662662
slider_binding_range_kwargs = {}
663663

@@ -723,9 +723,11 @@ def replace_std(col):
723723
# get tooltips for heatmap
724724
float_cols = [c for c in req_cols if data_df[c].dtype == float]
725725
heatmap_tooltips = [
726-
alt.Tooltip(c, type="quantitative", format=".3g")
727-
if c in float_cols
728-
else alt.Tooltip(c, type="nominal")
726+
(
727+
alt.Tooltip(c, type="quantitative", format=".3g")
728+
if c in float_cols
729+
else alt.Tooltip(c, type="nominal")
730+
)
729731
for c in req_cols
730732
if c != category_col or show_category_label
731733
]
@@ -954,11 +956,13 @@ def replace_std(col):
954956
base_chart = base_chart.transform_filter(
955957
(alt.datum[slider_stat] <= (slider + 1e-6)) # round tol
956958
| ~alt.expr.isFinite(alt.datum[slider_stat]) # do not filter null
959+
| alt.datum["_stat_hide"] # do not filter hidden sites
957960
)
958961
else:
959962
base_chart = base_chart.transform_filter(
960963
(alt.datum[slider_stat] >= (slider - 1e-6)) # round tol
961964
| ~alt.expr.isFinite(alt.datum[slider_stat]) # do not filter null
965+
| alt.datum["_stat_hide"] # do not filter hidden sites
962966
)
963967
# Remove any sites that are only wildtype and filter with site zoom brush
964968
base_chart = (
@@ -1165,9 +1169,9 @@ def replace_std(col):
11651169
heatmaps.append(heatmap_bg + heatmap_hide + heatmap + heatmap_wildtype)
11661170
heatmaps = alt.vconcat(*heatmaps, spacing=10).resolve_scale(
11671171
x="shared",
1168-
color="shared"
1169-
if heatmap_color_scheme or len(categories) == 1
1170-
else "independent",
1172+
color=(
1173+
"shared" if heatmap_color_scheme or len(categories) == 1 else "independent"
1174+
),
11711175
)
11721176

11731177
chartlist = []

polyclonal/polyclonal.py

+23-25
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
88
"""
99

10-
1110
import collections
1211
import copy # noqa: F401
1312
import inspect
@@ -387,33 +386,33 @@ class Polyclonal:
387386
barcode aa_substitutions IC50
388387
0 AA 0.085
389388
1 AC M1C 0.230
390-
2 GA M1C 0.230
391-
3 AG G2A 0.296
392-
4 AT A4K 0.128
393-
5 TA A4L 0.117
394-
6 CA M1C G2A 0.355
395-
7 CG M1C A4K 0.722
396-
8 CC G2A A4K 1.414
397-
9 TC G2A A4L 0.858
398-
10 CT M1C G2A A4K 3.237
399-
11 TG M1C G2A A4L 1.430
389+
2 AG G2A 0.296
390+
3 AT A4K 0.128
391+
4 TA A4L 0.117
392+
5 CA M1C G2A 0.355
393+
6 CG M1C A4K 0.722
394+
7 CC G2A A4K 1.414
395+
8 TC G2A A4L 0.858
396+
9 CT M1C G2A A4K 3.237
397+
10 TG M1C G2A A4L 1.430
398+
11 GA M1C 0.230
400399
401400
Or the IC90s:
402401
403402
>>> model.icXX(variants_df, x=0.9, col='IC90').round(3)
404403
barcode aa_substitutions IC90
405404
0 AA 0.464
406405
1 AC M1C 1.260
407-
2 GA M1C 1.260
408-
3 AG G2A 1.831
409-
4 AT A4K 0.976
410-
5 TA A4L 0.782
411-
6 CA M1C G2A 2.853
412-
7 CG M1C A4K 4.176
413-
8 CC G2A A4K 7.473
414-
9 TC G2A A4L 4.532
415-
10 CT M1C G2A A4K 18.717
416-
11 TG M1C G2A A4L 9.532
406+
2 AG G2A 1.831
407+
3 AT A4K 0.976
408+
4 TA A4L 0.782
409+
5 CA M1C G2A 2.853
410+
6 CG M1C A4K 4.176
411+
7 CC G2A A4K 7.473
412+
8 TC G2A A4L 4.532
413+
9 CT M1C G2A A4K 18.717
414+
10 TG M1C G2A A4L 9.532
415+
11 GA M1C 1.260
417416
418417
Or the fold change IC90s of all mutations:
419418
>>> model.mut_icXX_df(
@@ -2127,10 +2126,9 @@ def fit(
21272126
fit_kwargs["fix_hill_coefficient"] = True
21282127
fit_kwargs["fix_non_neutralized_frac"] = True
21292128
fit_kwargs["reg_activity_weight"] = fit_fixed_first_reg_activity_weight
2130-
fit_kwargs[
2131-
"log_desc"
2132-
] = "fixed Hill coefficient and non-neutralized frac" + (
2133-
f" {log_desc}" if log_desc else ""
2129+
fit_kwargs["log_desc"] = (
2130+
"fixed Hill coefficient and non-neutralized frac"
2131+
+ (f" {log_desc}" if log_desc else "")
21342132
)
21352133
self.fit(**fit_kwargs)
21362134
elif fit_site_level_first:

polyclonal/utils.py

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
88
"""
99

10-
1110
import re
1211

1312
import pandas as pd # noqa: F401

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Setup script for ``polyclonal``."""
22

3-
43
import re
54
import sys
65

@@ -52,6 +51,7 @@
5251
"natsort>=8.0",
5352
"numpy>=1.17",
5453
"pandas>=1.5",
54+
"pyarrow",
5555
"requests",
5656
"scipy>=1.7.1",
5757
"urllib3==1.26.15", # https://github.com/googleapis/python-bigquery/issues/1565

0 commit comments

Comments
 (0)