From 12f54cfbbfacafc618ac09dee819001308e8858c Mon Sep 17 00:00:00 2001
From: Kai Waldrant <kai@data-intuitive.com>
Date: Fri, 25 Aug 2023 12:39:38 +0200
Subject: [PATCH] Update/integrate last v1 changes (#214)

* update migration scripts

* update batch_int

* update bat_int clus_overlap

* update denoising

* add cp10k norm

* update schema

* update label_projection

* [WIP] spectral_features new control_method

* add diffusion map method

* update dim_red spectral_feature and diffu_map

* update dim_red

* generalise cp normalization

* CPM -> CP10k

* fix failing test

* fix typo

* update and rename rmse to distance correlation

* set CP normalization from cpm to cp10k

* fix typo cpm to cp

* updated changelog

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
---
 CHANGELOG.md                                  |  8 +-
 src/common/comp_tests/check_method_config.py  |  2 +-
 src/common/create_component/script.py         |  2 +-
 src/common/schemas/defs_common.yaml           |  2 +-
 .../normalization/log_cp/config.vsh.yaml      | 22 ++++++
 .../{log_cpm => log_cp}/script.py             | 11 +--
 .../normalization/log_cpm/config.vsh.yaml     | 13 ----
 .../{sqrt_cpm => sqrt_cp}/config.vsh.yaml     | 11 ++-
 .../{sqrt_cpm => sqrt_cp}/script.py           |  9 ++-
 src/datasets/processors/pca/script.py         |  2 +-
 .../resource_test_scripts/multimodal.sh       |  4 +-
 .../resource_test_scripts/pancreas.sh         |  4 +-
 .../workflows/process_openproblems_v1/main.nf |  8 +-
 src/migration/check_migration.sh              | 14 ++++
 .../check_migration_status/script.py          | 16 +++-
 .../no_integration_batch/config.vsh.yaml      |  2 +-
 .../random_embed_cell/config.vsh.yaml         |  2 +-
 .../random_embed_cell_jitter/config.vsh.yaml  |  2 +-
 .../random_integration/config.vsh.yaml        |  2 +-
 .../methods/bbknn/config.vsh.yaml             |  6 +-
 .../methods/combat/config.vsh.yaml            |  6 +-
 .../methods/fastmnn/config.vsh.yaml           |  2 +-
 .../methods/mnn_correct/config.vsh.yaml       |  2 +-
 .../methods/mnnpy/config.vsh.yaml             |  4 +-
 .../methods/scanorama_embed/config.vsh.yaml   |  6 +-
 .../methods/scanorama_feature/config.vsh.yaml |  6 +-
 .../methods/scanvi/config.vsh.yaml            |  2 +-
 .../methods/scvi/config.vsh.yaml              |  4 +-
 .../metrics/asw_batch/config.vsh.yaml         |  2 +-
 .../metrics/asw_label/config.vsh.yaml         |  2 +-
 .../cell_cycle_conservation/config.vsh.yaml   |  2 +-
 .../clustering_overlap/config.vsh.yaml        |  4 +-
 .../metrics/pcr/config.vsh.yaml               |  2 +-
 .../batch_integration/workflows/run/main.nf   |  2 +-
 .../no_denoising/config.vsh.yaml              |  2 +-
 .../perfect_denoising/config.vsh.yaml         |  2 +-
 .../denoising/methods/alra/config.vsh.yaml    |  2 +-
 .../denoising/methods/dca/config.vsh.yaml     |  2 +-
 .../methods/knn_smoothing/config.vsh.yaml     |  2 +-
 .../denoising/methods/magic/config.vsh.yaml   |  4 +-
 .../denoising/metrics/mse/config.vsh.yaml     |  4 +-
 .../denoising/metrics/poisson/config.vsh.yaml |  5 +-
 src/tasks/denoising/workflows/run/main.nf     |  2 +-
 src/tasks/denoising/workflows/run/run_test.sh |  2 +-
 .../workflows/run/run_test_on_tower.sh        |  2 +-
 .../random_features/config.vsh.yaml           |  2 +-
 .../spectral_features/config.vsh.yaml         | 41 ++++++++++
 .../true_features/config.vsh.yaml             | 23 +-----
 .../control_methods/true_features/script.py   | 12 +--
 .../methods/densmap/config.vsh.yaml           | 12 +--
 .../methods/diffusion_map/config.vsh.yaml     | 44 +++++++++++
 .../methods/diffusion_map/script.py           | 77 +++++++++++++++++++
 .../methods/ivis/config.vsh.yaml              |  4 +-
 .../methods/neuralee/config.vsh.yaml          |  6 +-
 .../methods/pca/config.vsh.yaml               |  8 +-
 .../methods/phate/config.vsh.yaml             | 12 +--
 .../methods/tsne/config.vsh.yaml              |  8 +-
 .../methods/umap/config.vsh.yaml              | 10 +--
 .../metrics/coranking/config.vsh.yaml         | 14 ++--
 .../metrics/coranking/library.bib             | 62 ---------------
 .../density_preservation/config.vsh.yaml      | 11 ++-
 .../metrics/density_preservation/script.py    | 25 +++---
 .../distance_correlation/config.vsh.yaml      | 49 ++++++++++++
 .../{rmse => distance_correlation}/script.py  | 23 +++---
 .../metrics/rmse/config.vsh.yaml              | 45 -----------
 .../metrics/trustworthiness/config.vsh.yaml   |  2 +-
 .../resources_test_scripts/pancreas.sh        |  4 +-
 .../workflows/run/main.nf                     |  2 +-
 .../workflows/run/run_test.sh                 |  2 +-
 .../workflows/run/run_test_on_tower.sh        |  2 +-
 .../majority_vote/config.vsh.yaml             |  2 +-
 .../random_labels/config.vsh.yaml             |  2 +-
 .../true_labels/config.vsh.yaml               |  2 +-
 .../methods/knn/config.vsh.yaml               |  6 +-
 .../logistic_regression/config.vsh.yaml       |  6 +-
 .../methods/mlp/config.vsh.yaml               |  6 +-
 .../methods/scanvi/config.vsh.yaml            |  4 +-
 .../methods/scanvi_scarches/config.vsh.yaml   |  4 +
 .../seurat_transferdata/config.vsh.yaml       |  4 +-
 .../methods/xgboost/config.vsh.yaml           |  6 +-
 .../metrics/accuracy/config.vsh.yaml          |  2 +-
 .../metrics/f1/config.vsh.yaml                |  6 +-
 .../resources_test_scripts/pancreas.sh        |  4 +-
 .../label_projection/workflows/run/main.nf    |  2 +-
 .../workflows/run/run_test.sh                 |  2 +-
 .../workflows/run/run_test_on_tower.sh        |  2 +-
 86 files changed, 455 insertions(+), 322 deletions(-)
 create mode 100644 src/datasets/normalization/log_cp/config.vsh.yaml
 rename src/datasets/normalization/{log_cpm => log_cp}/script.py (73%)
 delete mode 100644 src/datasets/normalization/log_cpm/config.vsh.yaml
 rename src/datasets/normalization/{sqrt_cpm => sqrt_cp}/config.vsh.yaml (52%)
 rename src/datasets/normalization/{sqrt_cpm => sqrt_cp}/script.py (80%)
 create mode 100644 src/migration/check_migration.sh
 create mode 100644 src/tasks/dimensionality_reduction/control_methods/spectral_features/config.vsh.yaml
 create mode 100644 src/tasks/dimensionality_reduction/methods/diffusion_map/config.vsh.yaml
 create mode 100644 src/tasks/dimensionality_reduction/methods/diffusion_map/script.py
 delete mode 100644 src/tasks/dimensionality_reduction/metrics/coranking/library.bib
 create mode 100644 src/tasks/dimensionality_reduction/metrics/distance_correlation/config.vsh.yaml
 rename src/tasks/dimensionality_reduction/metrics/{rmse => distance_correlation}/script.py (70%)
 delete mode 100644 src/tasks/dimensionality_reduction/metrics/rmse/config.vsh.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c63820536b..4c84ff9790 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,10 @@
 
 ## general
 
+### NEW FUNCTIONALITY
+
+* Updated all current tasks in v2 to latest changes in OP v1 (PR #214)
+
 ### MAJOR CHANGES
 
 * Relocate task directories to new `src/tasks/` location (PR #142).
@@ -11,6 +15,8 @@
   and `ghcr.io/openproblems-bio/base-r` (PR #168).
 
 * Update batch integration docker images to OpenProblems base images (PR #171).
+  
+* Changed default normalization CPM to CP10k (PR #214)
 
 ### MINOR CHANGES
 
@@ -274,7 +280,7 @@
 
 * `methods/neuralee`: Migrated from v1.
 
-* `metrics/rmse`: Migrated from v1, but will likely be removed.
+* `metrics/distance_correlation`: Migrated from v1, but will likely be removed.
 
 * `metrics/trustworthiness`: Migrated from v1, but will likely be removed.
 
diff --git a/src/common/comp_tests/check_method_config.py b/src/common/comp_tests/check_method_config.py
index ecbb2dbaf2..61a2bf0f6f 100644
--- a/src/common/comp_tests/check_method_config.py
+++ b/src/common/comp_tests/check_method_config.py
@@ -98,7 +98,7 @@ def search_ref_bib(reference):
                 assert arg_id in arg_names, f"Argument '{arg_id}' in `.functionality.info.variants['{paramset_id}']` is not an argument in `.functionality.arguments`."
 
 assert "preferred_normalization" in info, "preferred_normalization not an info field"
-norm_methods = ["log_cpm", "counts", "log_scran_pooling", "sqrt_cpm", "l1_sqrt"]
+norm_methods = ["log_cpm", "log_cp10k", "counts", "log_scran_pooling", "sqrt_cpm", "sqrt_cp10k", "l1_sqrt"]
 assert info["preferred_normalization"] in norm_methods, "info['preferred_normalization'] not one of '" + "', '".join(norm_methods) + "'."
 
 
diff --git a/src/common/create_component/script.py b/src/common/create_component/script.py
index 1bc6d97cc5..1c7de0010c 100644
--- a/src/common/create_component/script.py
+++ b/src/common/create_component/script.py
@@ -80,7 +80,7 @@ def generate_info(par, component_type, pretty_name) -> str:
       |    description: |
       |      FILL IN: A (multi-line) description of how this method works.
       |    # Which normalisation method this component prefers to use (required).
-      |    preferred_normalization: log_cpm
+      |    preferred_normalization: log_cp10k
       |''')
     if component_type == "method":
       str += strip_margin(f'''\
diff --git a/src/common/schemas/defs_common.yaml b/src/common/schemas/defs_common.yaml
index 0032c0e1c6..a069d5cc35 100644
--- a/src/common/schemas/defs_common.yaml
+++ b/src/common/schemas/defs_common.yaml
@@ -59,7 +59,7 @@ definitions:
     required: [ type ]
     additionalProperties: false
   PreferredNormalization:
-    enum: [l1_sqrt, log_cpm, log_scran_pooling, sqrt_cpm, counts]
+    enum: [l1_sqrt, log_cpm, log_cp10k, log_scran_pooling, sqrt_cpm, sqrt_cp10k, counts]
     description: |
       Which normalization method a component prefers. 
       
diff --git a/src/datasets/normalization/log_cp/config.vsh.yaml b/src/datasets/normalization/log_cp/config.vsh.yaml
new file mode 100644
index 0000000000..4d1770f2c4
--- /dev/null
+++ b/src/datasets/normalization/log_cp/config.vsh.yaml
@@ -0,0 +1,22 @@
+__merge__: ../../api/comp_normalization.yaml
+functionality:
+  name: "log_cp"
+  description: "Normalize data using Log CP"
+  resources:
+    - type: python_script
+      path: script.py
+  arguments:
+    - name: "--n_cp"
+      type: integer
+      default: 1e4
+      description: "Number of counts per cell"
+    - name: "--norm_id"
+      type: string
+      default: log_cp10k
+      description: "normalization ID to use e.g. 1e6 -> log_cpm, 1e4 -> log_cp10k"
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.1
+  - type: nextflow
+    directives: 
+      label: [ lowmem, lowcpu ]
diff --git a/src/datasets/normalization/log_cpm/script.py b/src/datasets/normalization/log_cp/script.py
similarity index 73%
rename from src/datasets/normalization/log_cpm/script.py
rename to src/datasets/normalization/log_cp/script.py
index 6a28cbcc22..0fadc2ffe4 100644
--- a/src/datasets/normalization/log_cpm/script.py
+++ b/src/datasets/normalization/log_cp/script.py
@@ -4,11 +4,12 @@
 par = {
     'input': "resources_test/common/pancreas/dataset.h5ad",
     'output': "output.h5ad",
-    'layer_output': "log_cpm",
-    'obs_size_factors': "log_cpm_size_factors"
+    'layer_output': "log_cp10k",
+    'obs_size_factors': "log_cp10k_size_factors",
+    'n_cp': 1e6,
 }
 meta = {
-    "functionality_name": "normalize_log_cpm"
+    "functionality_name": "normalize_log_cp10k"
 }
 ## VIASH END
 
@@ -18,7 +19,7 @@
 print(">> Normalize data", flush=True)
 norm = sc.pp.normalize_total(
     adata, 
-    target_sum=1e6, 
+    target_sum=par["n_cp"], 
     layer="counts", 
     inplace=False
 )
@@ -27,7 +28,7 @@
 print(">> Store output in adata", flush=True)
 adata.layers[par["layer_output"]] = lognorm
 adata.obs[par["obs_size_factors"]] = norm["norm_factor"]
-adata.uns["normalization_id"] = meta["functionality_name"]
+adata.uns["normalization_id"] = par["norm_id"]
 
 print(">> Write data", flush=True)
 adata.write_h5ad(par['output'], compression="gzip")
diff --git a/src/datasets/normalization/log_cpm/config.vsh.yaml b/src/datasets/normalization/log_cpm/config.vsh.yaml
deleted file mode 100644
index 631bdbae10..0000000000
--- a/src/datasets/normalization/log_cpm/config.vsh.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-__merge__: ../../api/comp_normalization.yaml
-functionality:
-  name: "log_cpm"
-  description: "Normalize data using Log CPM"
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
-  - type: docker
-    image: ghcr.io/openproblems-bio/base_python:1.0.1
-  - type: nextflow
-    directives: 
-      label: [ lowmem, lowcpu ]
diff --git a/src/datasets/normalization/sqrt_cpm/config.vsh.yaml b/src/datasets/normalization/sqrt_cp/config.vsh.yaml
similarity index 52%
rename from src/datasets/normalization/sqrt_cpm/config.vsh.yaml
rename to src/datasets/normalization/sqrt_cp/config.vsh.yaml
index dcf0b36b64..a347ec01d0 100644
--- a/src/datasets/normalization/sqrt_cpm/config.vsh.yaml
+++ b/src/datasets/normalization/sqrt_cp/config.vsh.yaml
@@ -1,10 +1,19 @@
 __merge__: ../../api/comp_normalization.yaml
 functionality:
-  name: "sqrt_cpm"
+  name: "sqrt_cp"
   description: "Normalize data using Log Sqrt"
   resources:
     - type: python_script
       path: script.py
+  arguments:
+    - name: "--n_cp"
+      type: integer
+      default: 1e4
+      description: "Number of counts per cell"
+    - name: "--norm_id"
+      type: string
+      default: sqrt_cp10k
+      description: "normalization id to use e.g. 1e4 -> sqrt_cp10k, 1e6 -> sqrt_cpm"
 platforms:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.1
diff --git a/src/datasets/normalization/sqrt_cpm/script.py b/src/datasets/normalization/sqrt_cp/script.py
similarity index 80%
rename from src/datasets/normalization/sqrt_cpm/script.py
rename to src/datasets/normalization/sqrt_cp/script.py
index f99227f3c9..af30b56083 100644
--- a/src/datasets/normalization/sqrt_cpm/script.py
+++ b/src/datasets/normalization/sqrt_cp/script.py
@@ -6,7 +6,8 @@
     'input': "resources_test/common/pancreas/dataset.h5ad",
     'output': "output.h5ad",
     'layer_output': "sqrt_cpm",
-    'obs_size_factors': "size_factors_sqrt_cpm"
+    'obs_size_factors': "size_factors_sqrt_cpm",
+    'n_cp': 1e6,
 }
 meta = {
     "functionality_name": "normalize_sqrt_cpm"
@@ -19,16 +20,16 @@
 print(">> Normalize data", flush=True)
 norm = sc.pp.normalize_total(
     adata, 
-    target_sum=1e6, 
+    target_sum=par['n_cp'], 
     layer="counts", 
     inplace=False
 )
-lognorm = np.sqrt(norm["X"])
+lognorm = np.sqrt(norm['X'])
 
 print(">> Store output in adata", flush=True)
 adata.layers[par["layer_output"]] = lognorm
 adata.obs[par["obs_size_factors"]] = norm["norm_factor"]
-adata.uns["normalization_id"] = meta["functionality_name"]
+adata.uns["normalization_id"] = par["norm_id"]
 
 print(">> Write data", flush=True)
 adata.write_h5ad(par['output'], compression="gzip")
diff --git a/src/datasets/processors/pca/script.py b/src/datasets/processors/pca/script.py
index ffc89c34c0..0990b97374 100644
--- a/src/datasets/processors/pca/script.py
+++ b/src/datasets/processors/pca/script.py
@@ -4,7 +4,7 @@
 ### VIASH START
 par = {
   'input': 'resources_test/common/pancreas/dataset.h5ad',
-  'layer_input': 'log_cpm',
+  'layer_input': 'log_cp10k',
   'output': 'dataset.h5ad',
   'obsm_embedding': 'X_pca',
   'varm_loadings': 'pca_loadings',
diff --git a/src/datasets/resource_test_scripts/multimodal.sh b/src/datasets/resource_test_scripts/multimodal.sh
index fe0e9c472b..364efbf3ad 100644
--- a/src/datasets/resource_test_scripts/multimodal.sh
+++ b/src/datasets/resource_test_scripts/multimodal.sh
@@ -43,12 +43,12 @@ viash run src/datasets/processors/subsample/config.vsh.yaml -- \
 
 
 # run sqrt cpm normalisation on mod 1 file
-viash run src/datasets/normalization/log_cpm/config.vsh.yaml -- \
+viash run src/datasets/normalization/sqrt_cp/config.vsh.yaml -- \
     --input $DATASET_DIR/raw_mod1.h5ad \
     --output $DATASET_DIR/normalized_mod1.h5ad
 
 # run log cpm normalisation on mod 2 file
-viash run src/datasets/normalization/log_cpm/config.vsh.yaml -- \
+viash run src/datasets/normalization/log_cp/config.vsh.yaml -- \
     --input $DATASET_DIR/raw_mod2.h5ad \
     --output $DATASET_DIR/normalized_mod2.h5ad
 
diff --git a/src/datasets/resource_test_scripts/pancreas.sh b/src/datasets/resource_test_scripts/pancreas.sh
index e78f738649..9a49f7c7de 100755
--- a/src/datasets/resource_test_scripts/pancreas.sh
+++ b/src/datasets/resource_test_scripts/pancreas.sh
@@ -42,8 +42,8 @@ viash run src/datasets/processors/subsample/config.vsh.yaml -- \
     --output $DATASET_DIR/raw.h5ad \
     --seed 123
 
-# run log cpm normalisation
-viash run src/datasets/normalization/log_cpm/config.vsh.yaml -- \
+# run log cp10k normalisation
+viash run src/datasets/normalization/log_cp/config.vsh.yaml -- \
     --input $DATASET_DIR/raw.h5ad \
     --output $DATASET_DIR/normalized.h5ad
 
diff --git a/src/datasets/workflows/process_openproblems_v1/main.nf b/src/datasets/workflows/process_openproblems_v1/main.nf
index aa6e0f4243..dfca3e8b49 100644
--- a/src/datasets/workflows/process_openproblems_v1/main.nf
+++ b/src/datasets/workflows/process_openproblems_v1/main.nf
@@ -7,9 +7,9 @@ targetDir = params.rootDir + "/target/nextflow"
 include { openproblems_v1 } from "$targetDir/datasets/loaders/openproblems_v1/main.nf"
 
 // normalization methods
-include { log_cpm } from "$targetDir/datasets/normalization/log_cpm/main.nf"
+include { log_cpm } from "$targetDir/datasets/normalization/log_cp/main.nf"
 include { log_scran_pooling } from "$targetDir/datasets/normalization/log_scran_pooling/main.nf"
-include { sqrt_cpm } from "$targetDir/datasets/normalization/sqrt_cpm/main.nf"
+include { sqrt_cpm } from "$targetDir/datasets/normalization/sqrt_cp/main.nf"
 include { l1_sqrt } from "$targetDir/datasets/normalization/l1_sqrt/main.nf"
 
 // dataset processors
@@ -27,8 +27,8 @@ config = readConfig("$projectDir/config.vsh.yaml")
 // add custom tracer to nextflow to capture exit codes, memory usage, cpu usage, etc.
 traces = initialize_tracer()
 
-// normalization_methods = [log_cpm, log_scran_pooling, sqrt_cpm, l1_sqrt
-normalization_methods = [log_cpm, sqrt_cpm, l1_sqrt]
+// normalization_methods = [log_cp, log_scran_pooling, sqrt_cp, l1_sqrt
+normalization_methods = [log_cp, sqrt_cp, l1_sqrt]
 
 workflow {
   helpMessage(config)
diff --git a/src/migration/check_migration.sh b/src/migration/check_migration.sh
new file mode 100644
index 0000000000..1ce39634f2
--- /dev/null
+++ b/src/migration/check_migration.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# viash run src/common/get_git_sha/config.vsh.yaml -p native -- --input /home/kai/Documents/openroblems/openproblems --output output/op_git_sha.json
+
+TASK_IDS=`ls src/tasks`
+
+for task_id in $TASK_IDS; do
+  echo ">> Processing $task_id"
+  viash run src/common/get_method_info/config.vsh.yaml -- --input . --task_id $task_id --output output/${task_id}_method.json
+  viash run src/migration/check_migration_status/config.vsh.yaml -p native -- --git_sha resources_test/input_git_sha.json --comp_info output/${task_id}_method.json --output output/${task_id}_method_status.json
+  viash run src/common/get_metric_info/config.vsh.yaml -- --input . --task_id $task_id --output output/${task_id}_metric.json
+  viash run src/migration/check_migration_status/config.vsh.yaml -p native -- --git_sha resources_test/input_git_sha.json --comp_info output/${task_id}_metric.json --output output/${task_id}_metric_status.json
+
+done 
\ No newline at end of file
diff --git a/src/migration/check_migration_status/script.py b/src/migration/check_migration_status/script.py
index 86d0a2ba46..6e88b2d9ed 100644
--- a/src/migration/check_migration_status/script.py
+++ b/src/migration/check_migration_status/script.py
@@ -3,9 +3,9 @@
 
 ## VIASH START
 par = {
-    'git_sha': 'temp/openproblems-v1.json',
-    'comp_info': 'temp/denoising_metrics.json',
-    'output': 'temp/migration_status.json'
+    'git_sha': 'resources_test/input_git_sha.json',
+    'comp_info': 'output/denoising_metric.json',
+    'output': 'output/denoising_metric_status.json'
 }
 ## VIASH END
 
@@ -16,10 +16,18 @@ def check_status(comp_item: List[Dict[str, str]], git_objects: List[Dict[str, st
     git_object["sha"]."""
 
     v1_path = comp_item.get("v1", {}).get("path")
+
+    if "metric_id" in comp_item:
+        v1_path = comp_item.get("v1.path")
+    
     if not v1_path:
         return "v1.path missing"
     
     v1_commit = comp_item.get("v1", {}).get("commit")
+
+    if "metric_id" in comp_item:
+        v1_commit = comp_item.get("v1.commit")
+
     if not v1_commit:
         return "v1.commit missing"
     
@@ -28,7 +36,7 @@ def check_status(comp_item: List[Dict[str, str]], git_objects: List[Dict[str, st
         return "v1.path does not exist in git repo"
 
     git_sha = git_object[0]["sha"]
-    if git_sha == comp_item["v1_commit"]:
+    if git_sha == v1_commit:
         return "up to date"
     else:
         return f"out of date (sha: {git_sha})"
diff --git a/src/tasks/batch_integration/control_methods/no_integration_batch/config.vsh.yaml b/src/tasks/batch_integration/control_methods/no_integration_batch/config.vsh.yaml
index da3013e908..b57dbb1cf9 100644
--- a/src/tasks/batch_integration/control_methods/no_integration_batch/config.vsh.yaml
+++ b/src/tasks/batch_integration/control_methods/no_integration_batch/config.vsh.yaml
@@ -9,7 +9,7 @@ functionality:
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
       commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/control_methods/random_embed_cell/config.vsh.yaml b/src/tasks/batch_integration/control_methods/random_embed_cell/config.vsh.yaml
index f6f6e89a56..a4ea2c49b8 100644
--- a/src/tasks/batch_integration/control_methods/random_embed_cell/config.vsh.yaml
+++ b/src/tasks/batch_integration/control_methods/random_embed_cell/config.vsh.yaml
@@ -9,7 +9,7 @@ functionality:
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
       commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/control_methods/random_embed_cell_jitter/config.vsh.yaml b/src/tasks/batch_integration/control_methods/random_embed_cell_jitter/config.vsh.yaml
index 3e4a0fc924..faf4c6f702 100644
--- a/src/tasks/batch_integration/control_methods/random_embed_cell_jitter/config.vsh.yaml
+++ b/src/tasks/batch_integration/control_methods/random_embed_cell_jitter/config.vsh.yaml
@@ -9,7 +9,7 @@ functionality:
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
       commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
   arguments:
     - name: "--jitter"
       type: double
diff --git a/src/tasks/batch_integration/control_methods/random_integration/config.vsh.yaml b/src/tasks/batch_integration/control_methods/random_integration/config.vsh.yaml
index a9e0884ca5..9b43f82aea 100644
--- a/src/tasks/batch_integration/control_methods/random_integration/config.vsh.yaml
+++ b/src/tasks/batch_integration/control_methods/random_integration/config.vsh.yaml
@@ -9,7 +9,7 @@ functionality:
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
       commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/methods/bbknn/config.vsh.yaml b/src/tasks/batch_integration/methods/bbknn/config.vsh.yaml
index 129bac5cbf..742616c743 100644
--- a/src/tasks/batch_integration/methods/bbknn/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/bbknn/config.vsh.yaml
@@ -15,12 +15,12 @@ functionality:
     documentation_url: "https://github.com/Teichlab/bbknn#readme"
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       bbknn_full_unscaled:
       bbknn_full_scaled:
-        preferred_normalization: log_cpm_scaled
+        preferred_normalization: log_cp10k_scaled
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/methods/combat/config.vsh.yaml b/src/tasks/batch_integration/methods/combat/config.vsh.yaml
index 4e01dfb1ec..0314e42438 100644
--- a/src/tasks/batch_integration/methods/combat/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/combat/config.vsh.yaml
@@ -18,12 +18,12 @@ functionality:
     documentation_url: "https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html"
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       combat_full_unscaled:
       combat_full_scaled:
-        preferred_normalization: log_cpm_scaled
+        preferred_normalization: log_cp10k_scaled
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/methods/fastmnn/config.vsh.yaml b/src/tasks/batch_integration/methods/fastmnn/config.vsh.yaml
index a20640b119..b1ea4bec9e 100644
--- a/src/tasks/batch_integration/methods/fastmnn/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/fastmnn/config.vsh.yaml
@@ -17,7 +17,7 @@ functionality:
     reference: "haghverdi2018batch"
     repository_url: "https://code.bioconductor.org/browse/batchelor/"
     documentation_url: "https://bioconductor.org/packages/batchelor/"
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
   resources:
     - type: r_script
       path: script.R
diff --git a/src/tasks/batch_integration/methods/mnn_correct/config.vsh.yaml b/src/tasks/batch_integration/methods/mnn_correct/config.vsh.yaml
index 12c3b5ef52..15f30ec456 100644
--- a/src/tasks/batch_integration/methods/mnn_correct/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/mnn_correct/config.vsh.yaml
@@ -11,7 +11,7 @@ functionality:
     reference: "haghverdi2018batch"
     repository_url: "https://code.bioconductor.org/browse/batchelor/"
     documentation_url: "https://bioconductor.org/packages/batchelor/"
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
   resources:
     - type: r_script
       path: script.R
diff --git a/src/tasks/batch_integration/methods/mnnpy/config.vsh.yaml b/src/tasks/batch_integration/methods/mnnpy/config.vsh.yaml
index 47123c7372..5fdf1f0a8b 100644
--- a/src/tasks/batch_integration/methods/mnnpy/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/mnnpy/config.vsh.yaml
@@ -17,11 +17,11 @@ functionality:
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py
       commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
     variants:
       mnn_full_unscaled:
       mnn_full_scaled:
-        preferred_normalization: log_cpm_scaled
+        preferred_normalization: log_cp10k_scaled
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/methods/scanorama_embed/config.vsh.yaml b/src/tasks/batch_integration/methods/scanorama_embed/config.vsh.yaml
index ae4de238f1..654e8c6e25 100644
--- a/src/tasks/batch_integration/methods/scanorama_embed/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/scanorama_embed/config.vsh.yaml
@@ -13,12 +13,12 @@ functionality:
     documentation_url: "https://github.com/brianhie/scanorama#readme"
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       scanorama_embed_full_unscaled:
       scanorama_embed_full_scaled:
-        preferred_normalization: log_cpm_scaled
+        preferred_normalization: log_cp10k_scaled
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/methods/scanorama_feature/config.vsh.yaml b/src/tasks/batch_integration/methods/scanorama_feature/config.vsh.yaml
index 43b5e10062..b144b0e788 100644
--- a/src/tasks/batch_integration/methods/scanorama_feature/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/scanorama_feature/config.vsh.yaml
@@ -13,12 +13,12 @@ functionality:
     documentation_url: "https://github.com/brianhie/scanorama#readme"
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       scanorama_feature_full_unscaled:
       scanorama_feature_full_scaled:
-        preferred_normalization: log_cpm_scaled
+        preferred_normalization: log_cp10k_scaled
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/methods/scanvi/config.vsh.yaml b/src/tasks/batch_integration/methods/scanvi/config.vsh.yaml
index 82f75714b8..41182a651c 100644
--- a/src/tasks/batch_integration/methods/scanvi/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/scanvi/config.vsh.yaml
@@ -25,7 +25,7 @@ functionality:
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py
       commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
     variants:
       scanvi_full_unscaled:
   resources:
diff --git a/src/tasks/batch_integration/methods/scvi/config.vsh.yaml b/src/tasks/batch_integration/methods/scvi/config.vsh.yaml
index 75f1bcf6e5..d1bf368aa8 100644
--- a/src/tasks/batch_integration/methods/scvi/config.vsh.yaml
+++ b/src/tasks/batch_integration/methods/scvi/config.vsh.yaml
@@ -12,8 +12,8 @@ functionality:
     documentation_url: "https://github.com/YosefLab/scvi-tools#readme"
     v1:
       path: openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       scvi_full_unscaled:
   resources:
diff --git a/src/tasks/batch_integration/metrics/asw_batch/config.vsh.yaml b/src/tasks/batch_integration/metrics/asw_batch/config.vsh.yaml
index dbf6d97f4d..f265b058d8 100644
--- a/src/tasks/batch_integration/metrics/asw_batch/config.vsh.yaml
+++ b/src/tasks/batch_integration/metrics/asw_batch/config.vsh.yaml
@@ -32,7 +32,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/metrics/asw_label/config.vsh.yaml b/src/tasks/batch_integration/metrics/asw_label/config.vsh.yaml
index 50435d3ce6..6a5babce30 100644
--- a/src/tasks/batch_integration/metrics/asw_label/config.vsh.yaml
+++ b/src/tasks/batch_integration/metrics/asw_label/config.vsh.yaml
@@ -20,7 +20,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/metrics/cell_cycle_conservation/config.vsh.yaml b/src/tasks/batch_integration/metrics/cell_cycle_conservation/config.vsh.yaml
index 95fb0804d4..69849dfc4b 100644
--- a/src/tasks/batch_integration/metrics/cell_cycle_conservation/config.vsh.yaml
+++ b/src/tasks/batch_integration/metrics/cell_cycle_conservation/config.vsh.yaml
@@ -29,7 +29,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/metrics/clustering_overlap/config.vsh.yaml b/src/tasks/batch_integration/metrics/clustering_overlap/config.vsh.yaml
index 9e6558df6a..98ed7e3662 100644
--- a/src/tasks/batch_integration/metrics/clustering_overlap/config.vsh.yaml
+++ b/src/tasks/batch_integration/metrics/clustering_overlap/config.vsh.yaml
@@ -24,7 +24,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
       - name: nmi
         label: NMI
         summary: "NMI compares overlap by scaling using mean entropy terms and optimizing Louvain clustering to obtain the best match between clusters and labels."
@@ -43,7 +43,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/batch_integration/metrics/pcr/config.vsh.yaml b/src/tasks/batch_integration/metrics/pcr/config.vsh.yaml
index 68704855a0..b043c2cd47 100644
--- a/src/tasks/batch_integration/metrics/pcr/config.vsh.yaml
+++ b/src/tasks/batch_integration/metrics/pcr/config.vsh.yaml
@@ -23,7 +23,7 @@ functionality:
         reference: luecken2022benchmarking
         v1:
           path: openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
         min: 0
         max: 1
         maximize: true
diff --git a/src/tasks/batch_integration/workflows/run/main.nf b/src/tasks/batch_integration/workflows/run/main.nf
index d79e51a705..942878fd94 100644
--- a/src/tasks/batch_integration/workflows/run/main.nf
+++ b/src/tasks/batch_integration/workflows/run/main.nf
@@ -117,7 +117,7 @@ workflow run_wf {
         def pref = config.functionality.info.preferred_normalization
         // if the preferred normalisation is none at all,
         // we can pass whichever dataset we want
-        (norm == "log_cpm" && pref == "counts") || norm == pref
+        (norm == "log_cp10k" && pref == "counts") || norm == pref
       },
 
       // define a new 'id' by appending the method name to the dataset id
diff --git a/src/tasks/denoising/control_methods/no_denoising/config.vsh.yaml b/src/tasks/denoising/control_methods/no_denoising/config.vsh.yaml
index f5267b9a22..f03199ab17 100644
--- a/src/tasks/denoising/control_methods/no_denoising/config.vsh.yaml
+++ b/src/tasks/denoising/control_methods/no_denoising/config.vsh.yaml
@@ -7,7 +7,7 @@ functionality:
     description: "This method serves as a negative control, where the denoised data is a copy of the unaltered training data. This represents the scoring threshold if denoising was not performed on the data."
     v1:
       path: openproblems/tasks/denoising/methods/baseline.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants:
       no_denoising:
     preferred_normalization: counts
diff --git a/src/tasks/denoising/control_methods/perfect_denoising/config.vsh.yaml b/src/tasks/denoising/control_methods/perfect_denoising/config.vsh.yaml
index b4d7f84cfe..27fcfa6953 100644
--- a/src/tasks/denoising/control_methods/perfect_denoising/config.vsh.yaml
+++ b/src/tasks/denoising/control_methods/perfect_denoising/config.vsh.yaml
@@ -7,7 +7,7 @@ functionality:
     description: "This method serves as a positive control, where the test data is copied 1-to-1 to the denoised data. This makes it seem as if the data is perfectly denoised as it will be compared to the test data in the metrics."
     v1:
       path: openproblems/tasks/denoising/methods/baseline.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants: 
       perfect_denoising:
     preferred_normalization: counts
diff --git a/src/tasks/denoising/methods/alra/config.vsh.yaml b/src/tasks/denoising/methods/alra/config.vsh.yaml
index 96bf990a7d..82398c806d 100644
--- a/src/tasks/denoising/methods/alra/config.vsh.yaml
+++ b/src/tasks/denoising/methods/alra/config.vsh.yaml
@@ -18,7 +18,7 @@ functionality:
     documentation_url: https://github.com/KlugerLab/ALRA/blob/master/README.md
     v1:
       path: openproblems/tasks/denoising/methods/alra.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants: 
       alra:
     preferred_normalization: counts
diff --git a/src/tasks/denoising/methods/dca/config.vsh.yaml b/src/tasks/denoising/methods/dca/config.vsh.yaml
index 125ee0e4a1..29c7b244ef 100644
--- a/src/tasks/denoising/methods/dca/config.vsh.yaml
+++ b/src/tasks/denoising/methods/dca/config.vsh.yaml
@@ -14,7 +14,7 @@ functionality:
     repository_url: "https://github.com/theislab/dca"
     v1:
       path: openproblems/tasks/denoising/methods/dca.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants: 
       dca:
     preferred_normalization: counts
diff --git a/src/tasks/denoising/methods/knn_smoothing/config.vsh.yaml b/src/tasks/denoising/methods/knn_smoothing/config.vsh.yaml
index 92f35e3240..b573412828 100644
--- a/src/tasks/denoising/methods/knn_smoothing/config.vsh.yaml
+++ b/src/tasks/denoising/methods/knn_smoothing/config.vsh.yaml
@@ -20,7 +20,7 @@ functionality:
     repository_url: "https://github.com/yanailab/knn-smoothing"
     v1:
       path: openproblems/tasks/denoising/methods/knn_smoothing.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants: 
       knn_smoothing:
     preferred_normalization: counts
diff --git a/src/tasks/denoising/methods/magic/config.vsh.yaml b/src/tasks/denoising/methods/magic/config.vsh.yaml
index 48c6044fef..d3d7122c1a 100644
--- a/src/tasks/denoising/methods/magic/config.vsh.yaml
+++ b/src/tasks/denoising/methods/magic/config.vsh.yaml
@@ -3,7 +3,7 @@ functionality:
   name: "magic"
   info:
     label: MAGIC
-    summary: "MAGIC imputes and denoises scRNA-seq data using Euclidean distances and a Gaussian kernel to calculate the affinity matrix, followed by a Markov process and multiplication with the normalised data to obtain imputed values."
+    summary: "MAGIC imputes and denoises scRNA-seq data that is noisy or dropout-prone."
     description: "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for
         imputation and denoising of noisy or dropout-prone single cell RNA-sequencing
         data. Given a normalised scRNA-seq expression matrix, it first calculates
@@ -20,7 +20,7 @@ functionality:
     repository_url: "https://github.com/KrishnaswamyLab/MAGIC"
     v1:
       path: openproblems/tasks/denoising/methods/magic.py
-      commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants: 
       magic:
       magic_approx:
diff --git a/src/tasks/denoising/metrics/mse/config.vsh.yaml b/src/tasks/denoising/metrics/mse/config.vsh.yaml
index 89dc75d285..9013183fe4 100644
--- a/src/tasks/denoising/metrics/mse/config.vsh.yaml
+++ b/src/tasks/denoising/metrics/mse/config.vsh.yaml
@@ -10,10 +10,10 @@ functionality:
         reference: batson2019molecular
         v1:
           path: openproblems/tasks/denoising/metrics/mse.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
         maximize: false
         min: 0
-        max: +inf
+        max: "+.inf"
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/denoising/metrics/poisson/config.vsh.yaml b/src/tasks/denoising/metrics/poisson/config.vsh.yaml
index 1ef35f9d76..367570e8de 100644
--- a/src/tasks/denoising/metrics/poisson/config.vsh.yaml
+++ b/src/tasks/denoising/metrics/poisson/config.vsh.yaml
@@ -2,7 +2,6 @@ __merge__: ../../api/comp_metric.yaml
 functionality:
   name: "poisson"
   info:
-    reference: "batson2019molecular"
     metrics:
       - name: poisson
         label: Poisson Loss
@@ -12,10 +11,10 @@ functionality:
         reference: batson2019molecular
         v1:
           path: openproblems/tasks/denoising/metrics/poisson.py
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
         maximize: false
         min: 0
-        max: +inf
+        max: "+.inf"
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/denoising/workflows/run/main.nf b/src/tasks/denoising/workflows/run/main.nf
index 4b98ec7698..ed7585aa82 100644
--- a/src/tasks/denoising/workflows/run/main.nf
+++ b/src/tasks/denoising/workflows/run/main.nf
@@ -72,7 +72,7 @@ workflow run_wf {
         def pref = config.functionality.info.preferred_normalization
         // if the preferred normalisation is none at all,
         // we can pass whichever dataset we want
-        (norm == "log_cpm" && pref == "counts") || norm == pref
+        (norm == "log_cp10k" && pref == "counts") || norm == pref
       },
 
       // define a new 'id' by appending the method name to the dataset id
diff --git a/src/tasks/denoising/workflows/run/run_test.sh b/src/tasks/denoising/workflows/run/run_test.sh
index e671b93965..f6f0e8884c 100755
--- a/src/tasks/denoising/workflows/run/run_test.sh
+++ b/src/tasks/denoising/workflows/run/run_test.sh
@@ -22,7 +22,7 @@ nextflow \
   -c src/wf_utils/labels_ci.config \
   --id pancreas \
   --dataset_id pancreas \
-  --normalization_id log_cpm \
+  --normalization_id log_cp10k \
   --input_train $DATASET_DIR/train.h5ad \
   --input_test $DATASET_DIR/test.h5ad \
   --output scores.tsv \
diff --git a/src/tasks/denoising/workflows/run/run_test_on_tower.sh b/src/tasks/denoising/workflows/run/run_test_on_tower.sh
index 5634670594..912cd376dc 100644
--- a/src/tasks/denoising/workflows/run/run_test_on_tower.sh
+++ b/src/tasks/denoising/workflows/run/run_test_on_tower.sh
@@ -8,7 +8,7 @@ id: pancreas_subsample
 input_train: s3://openproblems-data/$DATASET_DIR/train.h5ad
 input_test: s3://openproblems-data/$DATASET_DIR/test.h5ad
 dataset_id: pancreas
-normalization_id: log_cpm
+normalization_id: log_cp10k
 output: scores.tsv
 publish_dir: s3://openproblems-nextflow/output_test/v2/denoising
 HERE
diff --git a/src/tasks/dimensionality_reduction/control_methods/random_features/config.vsh.yaml b/src/tasks/dimensionality_reduction/control_methods/random_features/config.vsh.yaml
index 9cbb060c57..6fe1089de7 100644
--- a/src/tasks/dimensionality_reduction/control_methods/random_features/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/control_methods/random_features/config.vsh.yaml
@@ -7,7 +7,7 @@ functionality:
     description: "This method serves as a negative control, where the data is randomly embedded into a two-dimensional space, with no attempt to preserve the original structure."
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/baseline.py
-      commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+      commit: 80b37e7a6aa27df4436f400397564c01276817e0
     preferred_normalization: counts
     variants:
       random_features:
diff --git a/src/tasks/dimensionality_reduction/control_methods/spectral_features/config.vsh.yaml b/src/tasks/dimensionality_reduction/control_methods/spectral_features/config.vsh.yaml
new file mode 100644
index 0000000000..ae926ec5d0
--- /dev/null
+++ b/src/tasks/dimensionality_reduction/control_methods/spectral_features/config.vsh.yaml
@@ -0,0 +1,41 @@
+__merge__: ../../api/comp_control_method.yaml
+functionality:
+  name: "spectral_features"
+  info:
+    label: Spectral Features
+    summary: "Positive control by Use 1000-dimensional diffusions maps as an embedding."
+    description: "This serves as a positive control since it uses 1000-dimensional diffusions maps as an embedding"
+    v1:
+      path: openproblems/tasks/dimensionality_reduction/methods/baseline.py
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
+    variants:
+      spectral_features:
+  arguments:
+    - name: "--n_comps"
+      type: integer
+      default: 1000
+      description: "Number of components to use for the embedding."
+    - name: t
+      type: integer
+      default: 1
+      description: "Number to power the eigenvalues by."
+    - name: n_retries
+      type: integer
+      default: 1
+      description: "Number of times to retry if the embedding fails, each time adding noise."
+  resources:
+    - type: python_script
+      path: /src/tasks/dimensionality_reduction/methods/diffusion_map/script.py
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.1
+    setup:
+      - type: python
+        pypi: 
+          - umap-learn
+          - scipy
+          - numpy
+  - type: nextflow
+    directives: 
+      label: [ highmem, highcpu ]
diff --git a/src/tasks/dimensionality_reduction/control_methods/true_features/config.vsh.yaml b/src/tasks/dimensionality_reduction/control_methods/true_features/config.vsh.yaml
index 37fb6bac0e..74d7f248e5 100644
--- a/src/tasks/dimensionality_reduction/control_methods/true_features/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/control_methods/true_features/config.vsh.yaml
@@ -7,35 +7,16 @@ functionality:
     description: "This serves as a positive control since the original high-dimensional data is retained as is, without any loss of information"
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/baseline.py
-      commit: 4a0ee9b3731ff10d8cd2e584726a61b502aef613
-    preferred_normalization: counts
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       true_features:
-      true_features_log_cpm:
-        preferred_normalization: log_cpm
-        use_normalized_layer: true
-      true_features_log_cpm_hvg:
-        preferred_normalization: log_cpm
-        use_normalized_layer: true
-        n_hvg: 1000
-  arguments:
-    - name: "--use_normalized_layer"
-      type: boolean
-      default: false
-      description: Whether to work with the raw counts or the normalized counts.
-    - name: "--n_hvg"
-      type: integer
-      description: Number of highly variable genes to subset to. If not specified, the input matrix will not be subset.
-      default: 1000
   resources:
     - type: python_script
       path: script.py
 platforms:
   - type: docker
     image: ghcr.io/openproblems-bio/base_python:1.0.1
-    setup:
-      - type: python
-        packages: scanpy
   - type: nextflow
     directives: 
       label: [ highmem, highcpu ]
diff --git a/src/tasks/dimensionality_reduction/control_methods/true_features/script.py b/src/tasks/dimensionality_reduction/control_methods/true_features/script.py
index aa8469051c..1a58cd4984 100644
--- a/src/tasks/dimensionality_reduction/control_methods/true_features/script.py
+++ b/src/tasks/dimensionality_reduction/control_methods/true_features/script.py
@@ -4,8 +4,6 @@
 par = {
     "input": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
     "output": "reduced.h5ad",
-    "n_hvg": 100,
-    "use_normalized_layer": False
 }
 meta = {
     "functionality_name": "true_features",
@@ -16,15 +14,7 @@
 input = ad.read_h5ad(par["input"])
 
 print("Create high dimensionally embedding with all features", flush=True)
-if par["use_normalized_layer"]:
-    X_emb = input.layers["counts"].toarray()
-else:
-    X_emb = input.layers["normalized"].toarray()
-
-if par["n_hvg"]:
-    print(f"Select top {par['n_hvg']} high variable genes", flush=True)
-    idx = input.var["hvg_score"].to_numpy().argsort()[::-1][:par["n_hvg"]]
-    X_emb = X_emb[:, idx]
+X_emb = input.layers["normalized"].toarray()
 
 print("Create output AnnData", flush=True)
 output = ad.AnnData(
diff --git a/src/tasks/dimensionality_reduction/methods/densmap/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/densmap/config.vsh.yaml
index cfb1ccd926..626110cd9a 100644
--- a/src/tasks/dimensionality_reduction/methods/densmap/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/densmap/config.vsh.yaml
@@ -10,15 +10,15 @@ functionality:
     documentation_url: https://github.com/lmcinnes/umap#readme
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/umap.py
-      commit: 14d70b330cae09527a6d4c4e552db240601e31cf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
-      densmap_logCPM:
-      densmap_pca_logCPM:
+      densmap_logCP10k:
+      densmap_pca_logCP10k:
         n_pca_dims: 50
-      densmap_logCPM_1kHVG:
+      densmap_logCP10k_1kHVG:
         n_hvg: 1000
-      densmap_pca_logCPM_1kHVG:
+      densmap_pca_logCP10k_1kHVG:
         n_pca_dims: 50
         n_hvg: 1000
   arguments:
diff --git a/src/tasks/dimensionality_reduction/methods/diffusion_map/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/diffusion_map/config.vsh.yaml
new file mode 100644
index 0000000000..643a7b8bed
--- /dev/null
+++ b/src/tasks/dimensionality_reduction/methods/diffusion_map/config.vsh.yaml
@@ -0,0 +1,44 @@
+__merge__: ../../api/comp_control_method.yaml
+functionality:
+  name: "diffusion_maps"
+  info:
+    label: Diffusion maps
+    summary: "Positive control by Use 1000-dimensional diffusions maps as an embedding."
+    description: "This serves as a positive control since it uses 1000-dimensional diffusions maps as an embedding"
+    reference: coifman2006diffusion
+    documentation_url: https://github.com/openproblems-bio/openproblems
+    repository_url: https://github.com/openproblems-bio/openproblems
+    v1:
+      path: openproblems/tasks/dimensionality_reduction/methods/diffusion_map.py
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
+    variants:
+      diffusion_map:
+  arguments:
+    - name: "--n_comps"
+      type: integer
+      default: 2
+      description: "Number of components to use for the embedding."
+    - name: t
+      type: integer
+      default: 1
+      description: "Number to power the eigenvalues by."
+    - name: n_retries
+      type: integer
+      default: 1
+      description: "Number of times to retry if the embedding fails, each time adding noise."
+  resources:
+    - type: python_script
+      path: script.py
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.1
+    setup:
+      - type: python
+        pypi: 
+          - umap-learn
+          - scipy
+          - numpy
+  - type: nextflow
+    directives: 
+      label: [ highmem, highcpu ]
diff --git a/src/tasks/dimensionality_reduction/methods/diffusion_map/script.py b/src/tasks/dimensionality_reduction/methods/diffusion_map/script.py
new file mode 100644
index 0000000000..cf8633120c
--- /dev/null
+++ b/src/tasks/dimensionality_reduction/methods/diffusion_map/script.py
@@ -0,0 +1,77 @@
+import anndata as ad
+import umap
+
+## VIASH START
+par = {
+    "input": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
+    "output": "reduced.h5ad",
+    "n_comps": 2,
+}
+meta = {
+    "functionality_name": "foo",
+}
+## VIASH END
+
+def diffusion_map(graph, n_comps, t, n_retries):
+    import numpy as np
+    import scipy.sparse.linalg
+
+    diag_data = np.asarray(graph.sum(axis=0))
+    identity = scipy.sparse.identity(graph.shape[0], dtype=np.float64)
+    diag = scipy.sparse.spdiags(
+        1.0 / np.sqrt(diag_data), 0, graph.shape[0], graph.shape[0]
+    )
+    laplacian = identity - diag * graph * diag
+    num_lanczos_vectors = max(2 * n_comps + 1, int(np.sqrt(graph.shape[0])))
+    try:
+        eigenvalues, eigenvectors = scipy.sparse.linalg.eigsh(
+            laplacian,
+            n_comps,
+            which="SM",
+            ncv=num_lanczos_vectors,
+            tol=1e-4,
+            v0=np.ones(laplacian.shape[0]),
+            maxiter=graph.shape[0] * 5,
+        )
+        return (eigenvalues**t) * eigenvectors
+    except scipy.sparse.linalg.ArpackNoConvergence:
+        if n_retries > 0:
+            # add some noise and try again
+            graph_rand = graph.copy().tocoo()
+            graph_rand.row = np.random.choice(
+                graph_rand.shape[0], len(graph_rand.row), replace=True
+            )
+            graph_rand.data *= 0.01
+            return diffusion_map(
+                graph + graph_rand, n_comps, t, n_retries=n_retries - 1
+            )
+        else:
+            raise
+
+print("Load input data", flush=True)
+input = ad.read_h5ad(par["input"])
+
+print("Create high dimensionally embedding with all features", flush=True)
+
+n_comps = min(par["n_comps"], min(input.shape) - 2)
+
+graph = umap.UMAP(transform_mode="graph").fit_transform(input.layers["normalized"])
+
+X_emb = diffusion_map(graph, n_comps, t=par["t"], n_retries=par["n_retries"])
+
+
+print("Create output AnnData", flush=True)
+output = ad.AnnData(
+    obs=input.obs[[]],
+    obsm={
+        "X_emb": X_emb
+    },
+    uns={
+        "dataset_id": input.uns["dataset_id"],
+        "normalization_id": input.uns["normalization_id"],
+        "method_id": meta["functionality_name"]
+    }
+)
+
+print("Write output to file", flush=True)
+output.write_h5ad(par["output"], compression="gzip")
\ No newline at end of file
diff --git a/src/tasks/dimensionality_reduction/methods/ivis/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/ivis/config.vsh.yaml
index 4d57c4df9d..c22d2d1fd6 100644
--- a/src/tasks/dimensionality_reduction/methods/ivis/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/ivis/config.vsh.yaml
@@ -17,8 +17,8 @@ functionality:
     documentation_url: "https://github.com/beringresearch/ivis#readme"
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/ivis.py
-      commit: 9ebb777b3b76337e731a3b99f4bf39462a15c4cc
-    preferred_normalization: log_cpm
+      commit: 93d2161a08da3edf249abedff5111fb5ce527552
+    preferred_normalization: log_cp10k
     variants:
       ivis_logCPM_1kHVG:
   arguments:
diff --git a/src/tasks/dimensionality_reduction/methods/neuralee/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/neuralee/config.vsh.yaml
index 6911b450a2..34e13c8c41 100644
--- a/src/tasks/dimensionality_reduction/methods/neuralee/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/neuralee/config.vsh.yaml
@@ -18,13 +18,13 @@ functionality:
     documentation_url: "https://github.com/HiBearME/NeuralEE#readme"
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/neuralee.py
-      commit: 14d70b330cae09527a6d4c4e552db240601e31cf
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       neuralee_default:
         normalize: true
         n_hvg: 500
-      neuralee_logCPM_1kHVG:
+      neuralee_logCP10k_1kHVG:
         normalize: false
         n_hvg: 1000
   arguments:
diff --git a/src/tasks/dimensionality_reduction/methods/pca/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/pca/config.vsh.yaml
index 7ae19d13e9..5ca15443c4 100644
--- a/src/tasks/dimensionality_reduction/methods/pca/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/pca/config.vsh.yaml
@@ -16,11 +16,11 @@ functionality:
     documentation_url: "https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html"
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/pca.py
-      commit: 14d70b330cae09527a6d4c4e552db240601e31cf
-    preferred_normalization: log_cpm
+      commit: 154ccb9fd99113f3d28d9c3f139194539a0290f9
+    preferred_normalization: log_cp10k
     variants:
-      pca_logCPM:
-      pca_logCPM_1kHVG:
+      pca_logCP10k:
+      pca_logCP10k_1kHVG:
         n_hvg: 1000
   arguments:
     - name: "--n_hvg"
diff --git a/src/tasks/dimensionality_reduction/methods/phate/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/phate/config.vsh.yaml
index d69b8cc6f2..57b0e0eeac 100644
--- a/src/tasks/dimensionality_reduction/methods/phate/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/phate/config.vsh.yaml
@@ -18,17 +18,17 @@ functionality:
     documentation_url: "https://github.com/KrishnaswamyLab/PHATE#readme"
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/phate.py
-      commit: 14d70b330cae09527a6d4c4e552db240601e31cf
-    preferred_normalization: sqrt_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: sqrt_cp10k
     variants:
       phate_default:
       phate_sqrt:
         gamma: 0
-      phate_logCPM:
-        preferred_normalization: log_cpm
-      phate_logCPM_1kHVG:
+      phate_logCP10k:
+        preferred_normalization: log_cp10k
+      phate_logCP10k_1kHVG:
         n_hvg: 1000
-        preferred_normalization: log_cpm
+        preferred_normalization: log_cp10k
   arguments:
     - name: '--n_pca_dims'
       type: integer
diff --git a/src/tasks/dimensionality_reduction/methods/tsne/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/tsne/config.vsh.yaml
index 0da62a6a83..1b3e9ca9f4 100644
--- a/src/tasks/dimensionality_reduction/methods/tsne/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/tsne/config.vsh.yaml
@@ -16,11 +16,11 @@ functionality:
     documentation_url: "https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE"
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/tsne.py
-      commit: 14d70b330cae09527a6d4c4e552db240601e31cf
-    preferred_normalization: log_cpm
+      commit: 154ccb9fd99113f3d28d9c3f139194539a0290f9
+    preferred_normalization: log_cp10k
     variants:
-      tsne_logCPM:
-      tsne_logCPM_1kHVG:
+      tsne_logCP10k:
+      tsne_logCP10k_1kHVG:
         n_hvg: 1000
   arguments:
     - name: "--n_hvg"
diff --git a/src/tasks/dimensionality_reduction/methods/umap/config.vsh.yaml b/src/tasks/dimensionality_reduction/methods/umap/config.vsh.yaml
index 1aff2d0c2c..ddced67815 100644
--- a/src/tasks/dimensionality_reduction/methods/umap/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/methods/umap/config.vsh.yaml
@@ -16,14 +16,14 @@ functionality:
     v1:
       path: openproblems/tasks/dimensionality_reduction/methods/umap.py
       commit: 14d70b330cae09527a6d4c4e552db240601e31cf
-    preferred_normalization: log_cpm
+    preferred_normalization: log_cp10k
     variants:
-      umap_logCPM:
-      umap_pca_logCPM:
+      umap_logCP10k:
+      umap_pca_logCP10k:
         n_pca_dims: 50
-      umap_logCPM_1kHVG:
+      umap_logCP10k_1kHVG:
         n_hvg: 1000
-      umap_pca_logCPM_1kHVG:
+      umap_pca_logCP10k_1kHVG:
         n_pca_dims: 50
         n_hvg: 1000
   arguments:
diff --git a/src/tasks/dimensionality_reduction/metrics/coranking/config.vsh.yaml b/src/tasks/dimensionality_reduction/metrics/coranking/config.vsh.yaml
index 552b50fd04..a4cc208ba3 100644
--- a/src/tasks/dimensionality_reduction/metrics/coranking/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/metrics/coranking/config.vsh.yaml
@@ -17,7 +17,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
@@ -38,7 +38,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
@@ -59,7 +59,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
@@ -80,7 +80,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
@@ -101,7 +101,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
@@ -122,7 +122,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
@@ -143,7 +143,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py 
-          commit: 14d70b330cae09527a6d4c4e552db240601e31cf
+          commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
           note: |
             The original v1 implementations consisted of a lot of helper functions which were 
             derived from the pyDRMetrics package. This version uses the coRanking package
diff --git a/src/tasks/dimensionality_reduction/metrics/coranking/library.bib b/src/tasks/dimensionality_reduction/metrics/coranking/library.bib
deleted file mode 100644
index 5ecdb67e51..0000000000
--- a/src/tasks/dimensionality_reduction/metrics/coranking/library.bib
+++ /dev/null
@@ -1,62 +0,0 @@
-
-@misc{lueks2011evaluate,
-  doi = {10.48550/ARXIV.1110.3917},
-  url = {https://arxiv.org/abs/1110.3917},
-  author = {Lueks, Wouter and Mokbel, Bassam and Biehl, Michael and Hammer, Barbara},
-  keywords = {Machine Learning (cs.LG), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences},
-  title = {How to Evaluate Dimensionality Reduction? - Improving the Co-ranking Matrix},
-  publisher = {arXiv},
-  year = {2011},
-  copyright = {arXiv.org perpetual, non-exclusive license}
-}
-@article{kraemer2018dimred,
-  doi = {10.32614/rj-2018-039},
-  url = {https://doi.org/10.32614/rj-2018-039},
-  year = {2018},
-  publisher = {The R Foundation},
-  volume = {10},
-  number = {1},
-  pages = {342},
-  author = {Guido Kraemer and Markus Reichstein and Miguel, D. Mahecha},
-  title = {{dimRed} and {coRanking} - Unifying Dimensionality Reduction in R},
-  journal = {The R Journal}
-}
-@article{chen2009local,
-  doi = {10.1198/jasa.2009.0111},
-  url = {https://doi.org/10.1198/jasa.2009.0111},
-  year = {2009},
-  month = mar,
-  publisher = {Informa {UK} Limited},
-  volume = {104},
-  number = {485},
-  pages = {209--219},
-  author = {Lisha Chen and Andreas Buja},
-  title = {Local Multidimensional Scaling for Nonlinear Dimension Reduction,  Graph Drawing,  and Proximity Analysis},
-  journal = {Journal of the American Statistical Association}
-}
-@article{lee2009quality,
-  doi = {10.1016/j.neucom.2008.12.017},
-  url = {https://doi.org/10.1016/j.neucom.2008.12.017},
-  year = {2009},
-  month = mar,
-  publisher = {Elsevier {BV}},
-  volume = {72},
-  number = {7-9},
-  pages = {1431--1443},
-  author = {John A. Lee and Michel Verleysen},
-  title = {Quality assessment of dimensionality reduction: Rank-based criteria},
-  journal = {Neurocomputing}
-}
-@article{venna2006local,
-  doi = {10.1016/j.neunet.2006.05.014},
-  url = {https://doi.org/10.1016/j.neunet.2006.05.014},
-  year = {2006},
-  month = jul,
-  publisher = {Elsevier {BV}},
-  volume = {19},
-  number = {6-7},
-  pages = {889--899},
-  author = {Jarkko Venna and Samuel Kaski},
-  title = {Local multidimensional scaling},
-  journal = {Neural Networks}
-}
\ No newline at end of file
diff --git a/src/tasks/dimensionality_reduction/metrics/density_preservation/config.vsh.yaml b/src/tasks/dimensionality_reduction/metrics/density_preservation/config.vsh.yaml
index 91d10dcf43..ed671faedd 100644
--- a/src/tasks/dimensionality_reduction/metrics/density_preservation/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/metrics/density_preservation/config.vsh.yaml
@@ -15,7 +15,16 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/density.py 
-          commit: 29803b95c88b4ec5921df2eec7111fd5d1a95daf
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+  arguments:
+    - name: "--n_neighbors"
+      type: integer
+      default: 30
+      description: "Number of neighbors to use for density estimation."
+    - name: "--seed"
+      type: integer
+      default: 42
+      description: "Random seed."
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/dimensionality_reduction/metrics/density_preservation/script.py b/src/tasks/dimensionality_reduction/metrics/density_preservation/script.py
index 9cae4d1f12..9bf44397c2 100644
--- a/src/tasks/dimensionality_reduction/metrics/density_preservation/script.py
+++ b/src/tasks/dimensionality_reduction/metrics/density_preservation/script.py
@@ -11,6 +11,8 @@
     "input_embedding": "resources_test/dimensionality_reduction/pancreas/reduced.h5ad",
     "input_solution": "resources_test/dimensionality_reduction/pancreas/test.h5ad",
     "output": "score.h5ad",
+    "n_neighbors": 30,
+    "seed": 42,
 }
 ## VIASH END
 
@@ -84,27 +86,22 @@ def compute_density_preservation(
         return 0.0
     
     print("Compute local radii in original data", flush=True)
-    _, ro, _ = UMAP(
-        n_neighbors=_K,
-        random_state=_SEED,
-        densmap=True,
-        output_dens=True
-    ).fit_transform(high_dim)
+    ro = _calculate_radii(
+        high_dim,
+        n_neighbors=n_neighbors,
+        random_state=random_state
+    )
 
     print("Compute local radii of embedding", flush=True)
     re = _calculate_radii(
         X_emb,
-        n_neighbors=_K,
-        random_state=_SEED
+        n_neighbors=n_neighbors,
+        random_state=random_state
     )
     
     print("Compute pearson correlation", flush=True)
     return pearsonr(ro, re)[0]
 
-# number of neighbors
-_K = 30
-# Fix seed
-_SEED = 42
 
 print("Load data", flush=True)
 input_solution = ad.read_h5ad(par["input_solution"])
@@ -116,8 +113,8 @@ def compute_density_preservation(
 density_preservation = compute_density_preservation(
     X_emb=X_emb,
     high_dim=high_dim,
-    n_neighbors=_K,
-    random_state=_SEED
+    n_neighbors=par["n_neighbors"],
+    random_state=par["seed"]
 )
 
 print("Create output AnnData object", flush=True)
diff --git a/src/tasks/dimensionality_reduction/metrics/distance_correlation/config.vsh.yaml b/src/tasks/dimensionality_reduction/metrics/distance_correlation/config.vsh.yaml
new file mode 100644
index 0000000000..7e30f9efbe
--- /dev/null
+++ b/src/tasks/dimensionality_reduction/metrics/distance_correlation/config.vsh.yaml
@@ -0,0 +1,49 @@
+__merge__: ../../api/comp_metric.yaml
+functionality:
+  name: distance_correlation
+  info:
+    metrics:
+      - name: distance_correlation
+        label: Distance Correlation
+        summary: "Calculates the distance correlation by computing Spearman correlations between distances."
+        description: "Calculates the distance correlation by computing Spearman correlations between distances on the full (or processed) data matrix and the dimensionally-reduced matrix."
+        reference: kruskal1964mds
+        min: 0
+        max: "+.inf"
+        maximize: false
+        v1:
+          path: openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py 
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+          note: This metric was ported but will probably be removed soon.
+      - name: distance_correlation_spectral
+        label: Distance Correlation Spectral
+        summary: "Spearman correlation between all pairwise diffusion distances in the original and dimension-reduced data."
+        description: "Spearman correlation between all pairwise diffusion distances in the original and dimension-reduced data."
+        reference: coifman2006diffusion
+        min: 0
+        max: "+.inf"
+        maximize: false
+        v1:
+          path: openproblems/tasks/dimensionality_reduction/metrics/root_mean_square_error.py 
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+          note: This metric was ported but will probably be removed soon.
+  arguments:
+    - name: "--spectral"
+      type: boolean_true
+      description: Calculate the spectral root mean squared error.
+  resources:
+    - type: python_script
+      path: script.py
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.1
+    setup:
+      - type: python
+        packages:
+          - umap-learn
+          - scikit-learn
+          - numpy
+          - scipy
+  - type: nextflow
+    directives: 
+      label: [ midmem, midcpu ]
diff --git a/src/tasks/dimensionality_reduction/metrics/rmse/script.py b/src/tasks/dimensionality_reduction/metrics/distance_correlation/script.py
similarity index 70%
rename from src/tasks/dimensionality_reduction/metrics/rmse/script.py
rename to src/tasks/dimensionality_reduction/metrics/distance_correlation/script.py
index 4b33fe02ce..d461f271b4 100644
--- a/src/tasks/dimensionality_reduction/metrics/rmse/script.py
+++ b/src/tasks/dimensionality_reduction/metrics/distance_correlation/script.py
@@ -1,7 +1,7 @@
 import anndata as ad
 import numpy as np
 import sklearn.decomposition
-import scipy.optimize
+import scipy.stats
 import scipy.spatial
 from sklearn.metrics import pairwise_distances
 import umap
@@ -15,13 +15,13 @@
 }
 ## VIASH END
 
-def _rmse(X, X_emb):
+def _distance_correlation(X, X_emb):
     high_dimensional_distance_vector = scipy.spatial.distance.pdist(X)
     low_dimensional_distance_vector = scipy.spatial.distance.pdist(X_emb)
-    _, rmse = scipy.optimize.nnls(
-        low_dimensional_distance_vector[:, None], high_dimensional_distance_vector
+    corr = scipy.stats.spearmanr(
+        low_dimensional_distance_vector, high_dimensional_distance_vector
     )
-    return rmse
+    return corr
 
 print("Load data", flush=True)
 input_solution = ad.read_h5ad(par["input_solution"])
@@ -31,17 +31,18 @@ def _rmse(X, X_emb):
 X_emb = input_embedding.obsm["X_emb"]
 
 print("Compute NNLS residual after SVD", flush=True)
-n_svd = 200
+n_svd = 500
 svd_emb = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(high_dim)
-rmse = _rmse(svd_emb, X_emb)
+dist_corr = _distance_correlation(svd_emb, X_emb)
 
+#! Explicitly not changing it to use diffusion map method as this will have a positive effect on the diffusion map method for this specific metric.
 print("Compute NLSS residual after spectral embedding", flush=True)
-n_comps = min(200, min(input_solution.shape) - 2)
+n_comps = min(1000, min(input_solution.shape) - 2)
 umap_graph = umap.UMAP(transform_mode="graph").fit_transform(high_dim)
 spectral_emb = umap.spectral.spectral_layout(
     high_dim, umap_graph, n_comps, random_state=np.random.default_rng()
 )
-rmse_spectral = _rmse(spectral_emb, X_emb)
+dist_corr_spectral = _distance_correlation(spectral_emb, X_emb)
 
 print("Create output AnnData object", flush=True)
 output = ad.AnnData(
@@ -49,8 +50,8 @@ def _rmse(X, X_emb):
         "dataset_id": input_solution.uns["dataset_id"],
         "normalization_id": input_solution.uns["normalization_id"],
         "method_id": input_embedding.uns["method_id"],
-        "metric_ids": [ "rmse", "rmse_spectral" ],
-        "metric_values": [ rmse, rmse_spectral ]
+        "metric_ids": [ "distance correlation", "distance_correlation_spectral" ],
+        "metric_values": [ dist_corr, dist_corr_spectral ]
     }
 )
 
diff --git a/src/tasks/dimensionality_reduction/metrics/rmse/config.vsh.yaml b/src/tasks/dimensionality_reduction/metrics/rmse/config.vsh.yaml
deleted file mode 100644
index 5874ffb3c1..0000000000
--- a/src/tasks/dimensionality_reduction/metrics/rmse/config.vsh.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-__merge__: ../../api/comp_metric.yaml
-functionality:
-  name: "rmse"
-  info:
-    metrics:
-      - name: rmse
-        label: RMSE
-        summary: "The residual after applying the Non-Negative Least Squares solver on the pairwise distances of an SVD."
-        description: "The residual after applying the Non-Negative Least Squares solver on the pairwise distances of an SVD."
-        reference: kruskal1964mds
-        min: 0
-        max: "+.inf"
-        maximize: false
-      - name: rmse_spectral
-        label: RMSE Spectral
-        summary: "The residual after applying the Non-Negative Least Squares solver on the pairwise distances of a spectral embedding."
-        description: "The residual after applying the Non-Negative Least Squares solver on the pairwise distances of a spectral embedding."
-        reference: coifman2006diffusion
-        min: 0
-        max: "+.inf"
-        maximize: false
-        v1:
-          path: openproblems/tasks/dimensionality_reduction/metrics/root_mean_square_error.py 
-          commit: b353a462f6ea353e0fc43d0f9fcbbe621edc3a0b
-          note: This metric was ported but will probably be removed soon.
-  arguments:
-    - name: "--spectral"
-      type: boolean_true
-      description: Calculate the spectral root mean squared error.
-  resources:
-    - type: python_script
-      path: script.py
-platforms:
-  - type: docker
-    image: ghcr.io/openproblems-bio/base_python:1.0.1
-    setup:
-      - type: python
-        packages:
-          - umap-learn
-          - scikit-learn
-          - numpy
-          - scipy
-  - type: nextflow
-    directives: 
-      label: [ midmem, midcpu ]
diff --git a/src/tasks/dimensionality_reduction/metrics/trustworthiness/config.vsh.yaml b/src/tasks/dimensionality_reduction/metrics/trustworthiness/config.vsh.yaml
index ce65fc8b60..b56012ae74 100644
--- a/src/tasks/dimensionality_reduction/metrics/trustworthiness/config.vsh.yaml
+++ b/src/tasks/dimensionality_reduction/metrics/trustworthiness/config.vsh.yaml
@@ -13,7 +13,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py 
-          commit: c2470ce02e6f196267cec1c554ba7ae389c0956a
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
           note: This metric is already included in the 'coranking' component and can be removed.
   resources:
     - type: python_script
diff --git a/src/tasks/dimensionality_reduction/resources_test_scripts/pancreas.sh b/src/tasks/dimensionality_reduction/resources_test_scripts/pancreas.sh
index c208311399..165181ca72 100755
--- a/src/tasks/dimensionality_reduction/resources_test_scripts/pancreas.sh
+++ b/src/tasks/dimensionality_reduction/resources_test_scripts/pancreas.sh
@@ -8,7 +8,7 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
 # ensure that the command below is run from the root of the repository
 cd "$REPO_ROOT"
 
-RAW_DATA=resources_test/common/pancreas/dataset.h5ad
+RAW_DATA=resources_test/common/pancreas/cp10k_dataset.h5ad
 DATASET_DIR=resources_test/dimensionality_reduction/pancreas
 
 if [ ! -f $RAW_DATA ]; then
@@ -46,7 +46,7 @@ nextflow \
   -profile docker \
   --id pancreas \
   --dataset_id pancreas \
-  --normalization_id log_cpm \
+  --normalization_id log_cp10k \
   --input $DATASET_DIR/dataset.h5ad \
   --input_solution $DATASET_DIR/solution.h5ad \
   --output scores.tsv \
diff --git a/src/tasks/dimensionality_reduction/workflows/run/main.nf b/src/tasks/dimensionality_reduction/workflows/run/main.nf
index 9dd5b10231..6d0913191f 100644
--- a/src/tasks/dimensionality_reduction/workflows/run/main.nf
+++ b/src/tasks/dimensionality_reduction/workflows/run/main.nf
@@ -80,7 +80,7 @@ workflow run_wf {
         def pref = config.functionality.info.preferred_normalization
         // if the preferred normalisation is none at all,
         // we can pass whichever dataset we want
-        (norm == "log_cpm" && pref == "counts") || norm == pref
+        (norm == "log_cp10k" && pref == "counts") || norm == pref
       },
 
       // define a new 'id' by appending the method name to the dataset id
diff --git a/src/tasks/dimensionality_reduction/workflows/run/run_test.sh b/src/tasks/dimensionality_reduction/workflows/run/run_test.sh
index 3aeeb58baa..299f8accf8 100755
--- a/src/tasks/dimensionality_reduction/workflows/run/run_test.sh
+++ b/src/tasks/dimensionality_reduction/workflows/run/run_test.sh
@@ -21,7 +21,7 @@ nextflow \
   -resume \
   --id pancreas \
   --dataset_id pancreas \
-  --normalization_id log_cpm \
+  --normalization_id log_cp10k \
   --input $DATASET_DIR/dataset.h5ad \
   --input_solution $DATASET_DIR/solution.h5ad \
   --output scores.tsv \
diff --git a/src/tasks/dimensionality_reduction/workflows/run/run_test_on_tower.sh b/src/tasks/dimensionality_reduction/workflows/run/run_test_on_tower.sh
index befcde4d49..f2ff994080 100644
--- a/src/tasks/dimensionality_reduction/workflows/run/run_test_on_tower.sh
+++ b/src/tasks/dimensionality_reduction/workflows/run/run_test_on_tower.sh
@@ -8,7 +8,7 @@ id: pancreas_subsample
 input: s3://openproblems-data/$DATASET_DIR/dataset.h5ad
 input_solution: s3://openproblems-data/$DATASET_DIR/solution.h5ad
 dataset_id: pancreas
-normalization_id: log_cpm
+normalization_id: log_cp10k
 output: scores.tsv
 publish_dir: s3://openproblems-nextflow/output_test/v2/dimensionality_reduction
 HERE
diff --git a/src/tasks/label_projection/control_methods/majority_vote/config.vsh.yaml b/src/tasks/label_projection/control_methods/majority_vote/config.vsh.yaml
index 6cd01534c4..53142aaf9e 100644
--- a/src/tasks/label_projection/control_methods/majority_vote/config.vsh.yaml
+++ b/src/tasks/label_projection/control_methods/majority_vote/config.vsh.yaml
@@ -7,7 +7,7 @@ functionality:
     description: "A control-type method that predicts all cells to belong to the most abundant cell type in the dataset"
     v1:
       path: openproblems/tasks/label_projection/methods/baseline.py
-      commit: b460ecb183328c857cbbf653488f522a4034a61c
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     variants:
       majority_vote:
     preferred_normalization: counts
diff --git a/src/tasks/label_projection/control_methods/random_labels/config.vsh.yaml b/src/tasks/label_projection/control_methods/random_labels/config.vsh.yaml
index 014ee5249d..dc95a42468 100644
--- a/src/tasks/label_projection/control_methods/random_labels/config.vsh.yaml
+++ b/src/tasks/label_projection/control_methods/random_labels/config.vsh.yaml
@@ -7,7 +7,7 @@ functionality:
     description: "A negative control, where the labels are randomly predicted without training the data."
     v1:
       path: openproblems/tasks/label_projection/methods/baseline.py
-      commit: b460ecb183328c857cbbf653488f522a4034a61c
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     preferred_normalization: counts
     variants:
       random_labels:
diff --git a/src/tasks/label_projection/control_methods/true_labels/config.vsh.yaml b/src/tasks/label_projection/control_methods/true_labels/config.vsh.yaml
index ef313a16ee..384c2cf92e 100644
--- a/src/tasks/label_projection/control_methods/true_labels/config.vsh.yaml
+++ b/src/tasks/label_projection/control_methods/true_labels/config.vsh.yaml
@@ -7,7 +7,7 @@ functionality:
     description: "A positive control, where the solution labels are copied 1 to 1 to the predicted data."
     v1:
       path: openproblems/tasks/label_projection/methods/baseline.py
-      commit: b460ecb183328c857cbbf653488f522a4034a61c
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
     preferred_normalization: counts
     variants:
       true_labels:
diff --git a/src/tasks/label_projection/methods/knn/config.vsh.yaml b/src/tasks/label_projection/methods/knn/config.vsh.yaml
index 0841b7ebe4..12445bedd0 100644
--- a/src/tasks/label_projection/methods/knn/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/knn/config.vsh.yaml
@@ -17,10 +17,10 @@ functionality:
     documentation_url: "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html"
     v1:
       path: openproblems/tasks/label_projection/methods/knn_classifier.py
-      commit: c2470ce02e6f196267cec1c554ba7ae389c0956a
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
-      knn_classifier_log_cpm:
+      knn_classifier_log_cp10k:
       knn_classifier_scran:
         preferred_normalization: log_scran_pooling
   resources:
diff --git a/src/tasks/label_projection/methods/logistic_regression/config.vsh.yaml b/src/tasks/label_projection/methods/logistic_regression/config.vsh.yaml
index 8deac18a99..990b8cf368 100644
--- a/src/tasks/label_projection/methods/logistic_regression/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/logistic_regression/config.vsh.yaml
@@ -14,10 +14,10 @@ functionality:
     documentation_url: "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html"
     v1:
       path: openproblems/tasks/label_projection/methods/logistic_regression.py
-      commit: c2470ce02e6f196267cec1c554ba7ae389c0956a
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
-      logistic_regression_log_cpm:
+      logistic_regression_log_cp10k:
       logistic_regression_scran:
         preferred_normalization: log_scran_pooling
   resources:
diff --git a/src/tasks/label_projection/methods/mlp/config.vsh.yaml b/src/tasks/label_projection/methods/mlp/config.vsh.yaml
index 8ec1f9cbf0..8046a01e95 100644
--- a/src/tasks/label_projection/methods/mlp/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/mlp/config.vsh.yaml
@@ -17,10 +17,10 @@ functionality:
     documentation_url: "https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html"
     v1:
       path: openproblems/tasks/label_projection/methods/mlp.py
-      commit: c2470ce02e6f196267cec1c554ba7ae389c0956a
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
-      mlp_log_cpm:
+      mlp_log_cp10k:
       mlp_scran:
         preferred_normalization: log_scran_pooling
   arguments:
diff --git a/src/tasks/label_projection/methods/scanvi/config.vsh.yaml b/src/tasks/label_projection/methods/scanvi/config.vsh.yaml
index f765b07c98..5cbc8fb3a4 100644
--- a/src/tasks/label_projection/methods/scanvi/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/scanvi/config.vsh.yaml
@@ -17,8 +17,8 @@ functionality:
     documentation_url: https://scarches.readthedocs.io/en/latest/scanvi_surgery_pipeline.html
     v1:
       path: openproblems/tasks/label_projection/methods/scvi_tools.py
-      commit: 4bb8a7e04545a06c336d3d9364a1dd84fa2af1a4
-    preferred_normalization: log_cpm
+      commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
+    preferred_normalization: log_cp10k
     variants:
       scanvi_all_genes:
       scanvi_hvg:
diff --git a/src/tasks/label_projection/methods/scanvi_scarches/config.vsh.yaml b/src/tasks/label_projection/methods/scanvi_scarches/config.vsh.yaml
index 56662a542c..38df609144 100644
--- a/src/tasks/label_projection/methods/scanvi_scarches/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/scanvi_scarches/config.vsh.yaml
@@ -17,8 +17,12 @@ functionality:
     documentation_url: https://docs.scvi-tools.org
     repository_url: https://github.com/scverse/scvi-tools
     preferred_normalization: counts
+    v1:
+      path: openproblems/tasks/label_projection/methods/scvi_tools.py
+      commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
     variants:
       scanvi_scarches:
+      #! TODO: add other scanvi_scarches variants
 
   # Component-specific parameters (optional)
   arguments:
diff --git a/src/tasks/label_projection/methods/seurat_transferdata/config.vsh.yaml b/src/tasks/label_projection/methods/seurat_transferdata/config.vsh.yaml
index b30629f6b5..045819ba47 100644
--- a/src/tasks/label_projection/methods/seurat_transferdata/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/seurat_transferdata/config.vsh.yaml
@@ -18,8 +18,8 @@ functionality:
     documentation_url: "https://satijalab.org/seurat/articles/integration_mapping.html"
     v1:
       path: openproblems/tasks/label_projection/methods/seurat.py
-      commit: 3f19f0e87a8bc8b59c7521ba01917580aff81bc8
-    preferred_normalization: log_cpm
+      commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
+    preferred_normalization: log_cp10k
     variants:
       seurat:
   resources:
diff --git a/src/tasks/label_projection/methods/xgboost/config.vsh.yaml b/src/tasks/label_projection/methods/xgboost/config.vsh.yaml
index 2234967a79..c37e7611f9 100644
--- a/src/tasks/label_projection/methods/xgboost/config.vsh.yaml
+++ b/src/tasks/label_projection/methods/xgboost/config.vsh.yaml
@@ -14,10 +14,10 @@ functionality:
     documentation_url: "https://xgboost.readthedocs.io/en/stable/index.html"
     v1:
       path: openproblems/tasks/label_projection/methods/xgboost.py
-      commit: 123bb7b39c51c58e19ddf0fbbc1963c3dffde14c
-    preferred_normalization: log_cpm
+      commit: e3be930c6d4bbd656ab1e656badb52bb50e6cdd6
+    preferred_normalization: log_cp10k
     variants:
-      xgboost_log_cpm:
+      xgboost_log_cp10k:
       xgboost_scran:
         preferred_normalization: log_scran_pooling
   resources:
diff --git a/src/tasks/label_projection/metrics/accuracy/config.vsh.yaml b/src/tasks/label_projection/metrics/accuracy/config.vsh.yaml
index 9414a5eaad..11674fde5c 100644
--- a/src/tasks/label_projection/metrics/accuracy/config.vsh.yaml
+++ b/src/tasks/label_projection/metrics/accuracy/config.vsh.yaml
@@ -13,7 +13,7 @@ functionality:
         reference: grandini2020metrics
         v1:
           path: openproblems/tasks/label_projection/metrics/accuracy.py
-          commit: fcd5b876e7d0667da73a2858bc27c40224e19f65
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/label_projection/metrics/f1/config.vsh.yaml b/src/tasks/label_projection/metrics/f1/config.vsh.yaml
index f78f4c8bba..ec6eece949 100644
--- a/src/tasks/label_projection/metrics/f1/config.vsh.yaml
+++ b/src/tasks/label_projection/metrics/f1/config.vsh.yaml
@@ -13,7 +13,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/label_projection/metrics/f1.py
-          commit: bb16ca05ae1ce20ce59bfa7a879641b9300df6b0
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
       - name: f1_macro
         label: F1 macro
         summary: "Unweighted mean of each label F1-score"
@@ -24,7 +24,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/label_projection/metrics/f1.py
-          commit: bb16ca05ae1ce20ce59bfa7a879641b9300df6b0
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
       - name: f1_micro
         label: F1 micro
         summary: "Calculation of TP, FN and FP."
@@ -35,7 +35,7 @@ functionality:
         maximize: true
         v1:
           path: openproblems/tasks/label_projection/metrics/f1.py
-          commit: bb16ca05ae1ce20ce59bfa7a879641b9300df6b0
+          commit: b3456fd73c04c28516f6df34c57e6e3e8b0dab32
   resources:
     - type: python_script
       path: script.py
diff --git a/src/tasks/label_projection/resources_test_scripts/pancreas.sh b/src/tasks/label_projection/resources_test_scripts/pancreas.sh
index d9780a4425..bb3b687ba1 100755
--- a/src/tasks/label_projection/resources_test_scripts/pancreas.sh
+++ b/src/tasks/label_projection/resources_test_scripts/pancreas.sh
@@ -9,7 +9,7 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
 # ensure that the command below is run from the root of the repository
 cd "$REPO_ROOT"
 
-RAW_DATA=resources_test/common/pancreas/dataset.h5ad
+RAW_DATA=resources_test/common/pancreas/cp10k_dataset.h5ad
 DATASET_DIR=resources_test/label_projection/pancreas
 
 if [ ! -f $RAW_DATA ]; then
@@ -49,7 +49,7 @@ nextflow \
   -resume \
   --id pancreas \
   --dataset_id pancreas \
-  --normalization_id log_cpm \
+  --normalization_id log_cp10k \
   --input_train $DATASET_DIR/train.h5ad \
   --input_test $DATASET_DIR/test.h5ad \
   --input_solution $DATASET_DIR/solution.h5ad \
diff --git a/src/tasks/label_projection/workflows/run/main.nf b/src/tasks/label_projection/workflows/run/main.nf
index bd54498e0c..d6f5146440 100644
--- a/src/tasks/label_projection/workflows/run/main.nf
+++ b/src/tasks/label_projection/workflows/run/main.nf
@@ -85,7 +85,7 @@ workflow run_wf {
         def pref = config.functionality.info.preferred_normalization
         // if the preferred normalisation is none at all,
         // we can pass whichever dataset we want
-        (norm == "log_cpm" && pref == "counts") || norm == pref
+        (norm == "log_cp10k" && pref == "counts") || norm == pref
       },
 
       // define a new 'id' by appending the method name to the dataset id
diff --git a/src/tasks/label_projection/workflows/run/run_test.sh b/src/tasks/label_projection/workflows/run/run_test.sh
index b31c9ae4ac..a909381666 100755
--- a/src/tasks/label_projection/workflows/run/run_test.sh
+++ b/src/tasks/label_projection/workflows/run/run_test.sh
@@ -21,7 +21,7 @@ nextflow \
   -resume \
   --id pancreas \
   --dataset_id pancreas \
-  --normalization_id log_cpm \
+  --normalization_id log_cp10k \
   --input_train $DATASET_DIR/train.h5ad \
   --input_test $DATASET_DIR/test.h5ad \
   --input_solution $DATASET_DIR/solution.h5ad \
diff --git a/src/tasks/label_projection/workflows/run/run_test_on_tower.sh b/src/tasks/label_projection/workflows/run/run_test_on_tower.sh
index 27c7ee8e3d..cce0f3d89f 100644
--- a/src/tasks/label_projection/workflows/run/run_test_on_tower.sh
+++ b/src/tasks/label_projection/workflows/run/run_test_on_tower.sh
@@ -9,7 +9,7 @@ input_train: s3://openproblems-data/$DATASET_DIR/train.h5ad
 input_test: s3://openproblems-data/$DATASET_DIR/test.h5ad
 input_solution: s3://openproblems-data/$DATASET_DIR/solution.h5ad
 dataset_id: pancreas
-normalization_id: log_cpm
+normalization_id: log_cp10k
 output: scores.tsv
 publish_dir: s3://openproblems-nextflow/output_test/v2/label_projection
 HERE