add mnn (#210)

* add mnn * Update config.vsh.yaml add documentation * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review see PR https://github.com/chriscainx/mnnpy#48 * process comments * Update src/tasks/batch_integration/methods/mnn/script.py Co-authored-by: Robrecht Cannoodt <[email protected]> * rename method * update descriptions --------- Co-authored-by: Kai Waldrant <[email protected]> Co-authored-by: Robrecht Cannoodt <[email protected]>
openproblems-bio · Aug 9, 2023 · febb220 · febb220
1 parent 6a1f389
commit febb220
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 0 deletions.
diff --git a/src/tasks/batch_integration/methods/mnnpy/config.vsh.yaml b/src/tasks/batch_integration/methods/mnnpy/config.vsh.yaml
@@ -0,0 +1,52 @@
+# use method api spec
+__merge__: ../../api/comp_method_feature.yaml
+functionality:
+  name: mnnpy
+  info:
+    label: mnnpy
+    summary: "Batch effect correction by matching mutual nearest neighbors, Python implementation."
+    description: |
+      An implementation of MNN correct in python featuring low memory usage, full multicore support and compatibility with the scanpy framework.
+
+      Batch effect correction by matching mutual nearest neighbors (Haghverdi et al, 2018) has been implemented as a function 'mnnCorrect' in the R package scran. Sadly it's extremely slow for big datasets and doesn't make full use of the parallel architecture of modern CPUs.
+
+      This project is a python implementation of the MNN correct algorithm which takes advantage of python's extendability and hackability. It seamlessly integrates with the scanpy framework and has multicore support in its bones.
+    reference: "hie2019efficient"
+    repository_url: "https://github.com/chriscainx/mnnpy"
+    documentation_url: "https://github.com/chriscainx/mnnpy#readme"
+    v1:
+      path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py
+      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+    preferred_normalization: log_cpm
+    variants:
+      mnn_full_unscaled:
+      mnn_hvg_unscaled:
+        hvg: true
+      mnn_hvg_scaled:
+        hvg: true
+        preferred_normalization: log_cpm_scaled
+      mnn_full_scaled:
+        preferred_normalization: log_cpm_scaled
+  resources:
+    - type: python_script
+      path: script.py
+platforms:
+  - type: docker
+    image: python:3.8
+    setup:
+      - type: apt
+        packages:
+          - procps
+      - type: python
+        pypi:
+          - anndata~=0.8.0
+          - scanpy
+          - pyyaml
+          - requests
+          - jsonschema
+          - scib==1.1.3
+        github:
+          - chriscainx/mnnpy
+  - type: nextflow
+    directives:
+      label: [ lowcpu, lowmem ]
diff --git a/src/tasks/batch_integration/methods/mnnpy/script.py b/src/tasks/batch_integration/methods/mnnpy/script.py
@@ -0,0 +1,31 @@
+import anndata as ad
+from scib.integration import mnn
+
+## VIASH START
+par = {
+    'input': 'resources_test/batch_integration/pancreas/unintegrated.h5ad',
+    'output': 'output.h5ad',
+    'hvg': True,
+}
+meta = {
+    'functionality_name': 'foo',
+    'config': 'bar'
+}
+## VIASH END
+
+print('Read input', flush=True)
+adata = ad.read_h5ad(par['input'])
+
+if par['hvg']:
+    print('Select HVGs', flush=True)
+    adata = adata[:, adata.var['hvg']]
+
+print('Run mnn', flush=True)
+adata.X = adata.layers['normalized']
+adata.layers['corrected_counts'] = mnn(adata, batch='batch').X
+
+del adata.X
+
+print("Store outputs", flush=True)
+adata.uns['method_id'] = meta['functionality_name']
+adata.write_h5ad(par['output'], compression='gzip')