Skip to content

Commit

Permalink
Merge pull request #1041 from YosefLab/DE_string_queries
Browse files Browse the repository at this point in the history
De string queries
  • Loading branch information
adamgayoso authored Apr 27, 2021
2 parents 19b9c4f + 26c04ee commit 75eeb81
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 24 deletions.
6 changes: 3 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"sphinx.ext.viewcode",
"nbsphinx",
"nbsphinx_link",
"sphinx.ext.mathjax",
"sphinx.ext.napoleon",
"sphinx_autodoc_typehints", # needs to be after napoleon
"sphinx.ext.intersphinx",
"sphinx.ext.autosummary",
"scanpydoc.elegant_typehints",
"scanpydoc.definition_list_typed_field",
Expand Down Expand Up @@ -80,8 +80,8 @@
anndata=("https://anndata.readthedocs.io/en/stable/", None),
ipython=("https://ipython.readthedocs.io/en/stable/", None),
matplotlib=("https://matplotlib.org/", None),
numpy=("https://docs.scipy.org/doc/numpy/", None),
pandas=("https://pandas.pydata.org/pandas-docs/stable/", None),
numpy=("https://numpy.org/doc/stable/", None),
pandas=("https://pandas.pydata.org/docs/", None),
python=("https://docs.python.org/3", None),
scipy=("https://docs.scipy.org/doc/scipy/reference/", None),
sklearn=("https://scikit-learn.org/stable/", None),
Expand Down
27 changes: 27 additions & 0 deletions docs/release_notes/v0.11.0.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
New in 0.11.0 (2021-04-28)
--------------------------

Changes
~~~~~~~
- Includes new optional variance parameterization for the `Encoder` module (`#1037`_).
- Provides new way to select subpopulations for DE using Pandas queries (`#1041`_).
- Optional pseudocounts and automatic effect-size threshold for DE in the change mode (`#1043`_).


Contributors
~~~~~~~~~~~~
- `@adamgayoso`_
- `@romain-lopez`_
- `@PierreBoyeau`_


.. _`@adamgayoso`: https://github.com/adamgayoso
.. _`@romain-lopez`: https://github.com/romain-lopez
.. _`@PierreBoyeau`: https://github.com/PierreBoyeau


.. _`#1037`: https://github.com/YosefLab/scvi-tools/pull/1037
.. _`#1041`: https://github.com/YosefLab/scvi-tools/pull/1041
.. _`#1043`: https://github.com/YosefLab/scvi-tools/pull/1043


11 changes: 8 additions & 3 deletions scvi/_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,16 @@
If `None`, compare each group in `group1` to the union of the rest of the groups
in `groupby`. If a group identifier, compare with respect to this group.
idx1
Boolean mask or indices for `group1`. `idx1` and `idx2` can be used as an alternative
to the AnnData keys. If `idx1` is not `None`, this option overrides `group1`
`idx1` and `idx2` can be used as an alternative to the AnnData keys.
Custom identifier for `group1` that can be of three sorts: (1) a boolean mask,
(2) indices, or (3) a string. If it is a string, then it will query indices that
verifies conditions on `adata.obs`, as described in :meth:`pandas.DataFrame.query`
If `idx1` is not `None`, this option overrides `group1`
and `group2`.
idx2
Boolean mask or indices for `group2`. By default, includes all cells not specified in
Custom identifier for `group2` that has the same
properties as `idx1`.
By default, includes all cells not specified in
`idx1`.
mode
Method for differential expression. See user guide for full explanation.
Expand Down
4 changes: 2 additions & 2 deletions scvi/model/_peakvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,8 @@ def differential_accessibility(
groupby: Optional[str] = None,
group1: Optional[Iterable[str]] = None,
group2: Optional[str] = None,
idx1: Optional[Union[Sequence[int], Sequence[bool]]] = None,
idx2: Optional[Union[Sequence[int], Sequence[bool]]] = None,
idx1: Optional[Union[Sequence[int], Sequence[bool], str]] = None,
idx2: Optional[Union[Sequence[int], Sequence[bool], str]] = None,
mode: Literal["vanilla", "change"] = "change",
delta: float = 0.05,
batch_size: Optional[int] = None,
Expand Down
4 changes: 2 additions & 2 deletions scvi/model/_totalvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,8 @@ def differential_expression(
groupby: Optional[str] = None,
group1: Optional[Iterable[str]] = None,
group2: Optional[str] = None,
idx1: Optional[Union[Sequence[int], Sequence[bool]]] = None,
idx2: Optional[Union[Sequence[int], Sequence[bool]]] = None,
idx1: Optional[Union[Sequence[int], Sequence[bool], str]] = None,
idx2: Optional[Union[Sequence[int], Sequence[bool], str]] = None,
mode: Literal["vanilla", "change"] = "change",
delta: float = 0.25,
batch_size: Optional[int] = None,
Expand Down
2 changes: 1 addition & 1 deletion scvi/model/base/_base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def save(
save_anndata
If True, also saves the anndata
anndata_write_kwargs
Kwargs for :func:`~anndata.AnnData.write`
Kwargs for :meth:`~anndata.AnnData.write`
"""
# get all the user attributes
user_attributes = self._get_user_attributes()
Expand Down
4 changes: 2 additions & 2 deletions scvi/model/base/_rnamixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def differential_expression(
groupby: Optional[str] = None,
group1: Optional[Iterable[str]] = None,
group2: Optional[str] = None,
idx1: Optional[Union[Sequence[int], Sequence[bool]]] = None,
idx2: Optional[Union[Sequence[int], Sequence[bool]]] = None,
idx1: Optional[Union[Sequence[int], Sequence[bool], str]] = None,
idx2: Optional[Union[Sequence[int], Sequence[bool], str]] = None,
mode: Literal["vanilla", "change"] = "change",
delta: float = 0.25,
batch_size: Optional[int] = None,
Expand Down
62 changes: 51 additions & 11 deletions scvi/model/base/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import pickle
import warnings
from collections.abc import Iterable as IterableClass
from typing import Optional
from typing import List, Optional, Union

import numpy as np
import pandas as pd
import torch
from anndata import read
from anndata import AnnData, read

from scvi._compat import Literal
from scvi.utils import DifferentialComputation, track
Expand Down Expand Up @@ -86,6 +86,54 @@ def _validate_var_names(adata, source_var_names):
)


def _prepare_obs(
idx1: Union[List[bool], np.ndarray, str],
idx2: Union[List[bool], np.ndarray, str],
adata: AnnData,
):
"""
Construct an array used for masking.
Given population identifiers `idx1` and potentially `idx2`,
this function creates an array `obs_col` that identifies both populations
for observations contained in `adata`.
In particular, `obs_col` will take values `group1` (resp. `group2`)
for `idx1` (resp `idx2`).
Parameters
----------
idx1
Can be of three types. First, it can corresponds to a boolean mask that
has the same shape as adata. It can also corresponds to a list of indices.
Last, it can correspond to string query of adata.obs columns.
idx2
Same as above
adata
Anndata
"""

def ravel_idx(my_idx, obs_df):
return (
obs_df.index.isin(obs_df.query(my_idx).index)
if isinstance(my_idx, str)
else np.asarray(my_idx).ravel()
)

obs_df = adata.obs
idx1 = ravel_idx(idx1, obs_df)
g1_key = "one"
obs_col = np.array(["None"] * adata.shape[0], dtype=str)
obs_col[idx1] = g1_key
group1 = [g1_key]
group2 = None if idx2 is None else "two"
if idx2 is not None:
idx2 = ravel_idx(idx2, obs_df)
obs_col[idx2] = group2
if (obs_col[idx1].shape[0] == 0) or (obs_col[idx2].shape[0] == 0):
raise ValueError("One of idx1 or idx2 has size zero.")
return obs_col, group1, group2


def _de_core(
adata,
model_fn,
Expand Down Expand Up @@ -120,18 +168,10 @@ def _de_core(
# make a temp obs key using indices
temp_key = None
if idx1 is not None:
idx1 = np.asarray(idx1).ravel()
g1_key = "one"
obs_col = np.array(["None"] * adata.shape[0], dtype=str)
obs_col[idx1] = g1_key
group2 = None if idx2 is None else "two"
if idx2 is not None:
idx2 = np.asarray(idx2).ravel()
obs_col[idx2] = group2
obs_col, group1, group2 = _prepare_obs(idx1, idx2, adata)
temp_key = "_scvi_temp_de"
adata.obs[temp_key] = obs_col
groupby = temp_key
group1 = [g1_key]

df_results = []
dc = DifferentialComputation(model_fn, adata)
Expand Down
15 changes: 15 additions & 0 deletions tests/core/test_differential.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from scvi.data import synthetic_iid
from scvi.model import SCVI
from scvi.model.base._utils import _prepare_obs
from scvi.utils import DifferentialComputation


Expand Down Expand Up @@ -50,6 +51,20 @@ def m1_domain_fn_test(samples):
adata[adata.obs["labels"] == "label_1"], groupby="batch"
)

# Test query features
obs_col, group1, _, = _prepare_obs(
idx1="(labels == 'label_1') & (batch == 'batch_1')", idx2=None, adata=adata
)
assert (obs_col == group1).sum() == adata.obs.loc[
lambda x: (x.labels == "label_1") & (x.batch == "batch_1")
].shape[0]
model.differential_expression(
idx1="labels == 'label_1'",
)
model.differential_expression(
idx1="labels == 'label_1'", idx2="(labels == 'label_2') & (batch == 'batch_1')"
)

# test that ints as group work
a = synthetic_iid()
a.obs["test"] = [0] * 200 + [1] * 200
Expand Down

0 comments on commit 75eeb81

Please sign in to comment.