openproblems-bio · scottgigante-immunai · Jul 13, 2022 · Mar 22, 2021 · Mar 22, 2021 · Mar 16, 2022
diff --git a/docker/openproblems-python-tf2.4/Dockerfile b/docker/openproblems-python-tf2.4/Dockerfile
@@ -0,0 +1,15 @@
+FROM singlecellopenproblems/openproblems:latest
+
+ARG NB_USER="sagemaker-user"
+ARG NB_UID="1000"
+ARG NB_GID="100"
+
+USER root
+WORKDIR /
+
+# Install Python packages
+COPY ./docker/openproblems-python-tf2.4/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+USER $NB_UID
+WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-tf2.4/README.md b/docker/openproblems-python-tf2.4/README.md
@@ -0,0 +1,14 @@
+# openproblems-python-tf2.4 Docker image
+
+Base image: singlecellopenproblems/openproblems
+
+OS: Debian Stretch
+
+Python: 3.8
+
+Python packages:
+
+
+* keras >=2.4,<2.6
+* tensorflow >=2.4,<2.5
+* dca
diff --git a/docker/openproblems-python-tf2.4/requirements.txt b/docker/openproblems-python-tf2.4/requirements.txt
@@ -0,0 +1,4 @@
+dca==0.3.*
+keras>=2.4,<2.6  # pinned in dca
+pyyaml==5.4.1  # pinned in #431
+tensorflow==2.4.*  # pinned in dca
diff --git a/openproblems/tasks/denoising/README.md b/openproblems/tasks/denoising/README.md
@@ -8,10 +8,11 @@ A key challenge in evaluating denoising methods is the general lack of a ground
 
 # The metrics
 
-Metrics for data denoising aim to 
+Metrics for data denoising aim to assess denoising accuracy by comparing the denoised *training* set to the randomly sampled *test* set. Two comparisons have been implemented, *MSE* and *Poisson*, which penalize differences between the denoised *train* and *test* set under gaussian or poisson loss functions, respectively.  
 
-* **TODO**: TODO
-* **TODO**: TODO
+The *MSE* metric multiplies the *denoised* data by the rowsums of the *test* data, and divides by the sum of the *train* data. The result becomes a normalized version of the *denoised* data, which is compared to the *test* data via gaussian MSE.
+
+The *Poisson* metric multiplies the *denoised* data by the rowsums of the *test* data, and divides by the sum of the *train* data. The result becomes a normalized version of the *denoised* data, which is compared to the *test* data via poisson MSE.
 
 ## API
 

diff --git a/openproblems/tasks/denoising/methods/__init__.py b/openproblems/tasks/denoising/methods/__init__.py
@@ -1,4 +1,5 @@
 from .alra import alra
+from .dca import dca
 from .magic import magic
 from .magic import magic_approx
 from .no_denoising import no_denoising
diff --git a/openproblems/tasks/denoising/methods/dca.py b/openproblems/tasks/denoising/methods/dca.py
@@ -0,0 +1,40 @@
+from ....tools.decorators import method
+from ....tools.utils import check_version
+
+# import numpy as np
+import scanpy as sc
+
+
+def _dca(adata, test=False, epochs=None):
+    if test:
+        epochs = 30
+    else:
+        epochs = epochs or 300
+    from dca.api import dca
+
+    # find all-zero genes (columns)
+    gene_sums = np.asarray(adata.obsm["train"].sum(axis=0)).flatten()
+    is_missing = gene_sums == 0
+    # make adata object with train counts
+    adata2 = sc.AnnData(adata.obsm["train"])
+    # mask all-zero genes
+    adata2.X[:, is_missing] = 1
+    # run DCA
+    dca(adata2, epochs=epochs)
+    adata.obsm["denoised"] = adata2.X  # adata2.X should call the count matrix of DCA.
+    # return masked values to zero
+    adata.obsm["denoised"][:.is_missing] = 0
+    adata.uns["method_code_version"] = check_version("dca")
+    return adata
+
+
+@method(
+    method_name="DCA",
+    paper_name="Single-cell RNA-seq denoising using...",
+    paper_url="https://www.nature.com/articles/s41467-018-07931-2",
+    paper_year=2019,
+    code_url="https://github.com/theislab/dca",
+    image="openproblems-python-tf2.4",
+)
+def dca(adata, test=False, epochs=None):
+    return _dca(adata, test=test, epochs=epochs)