Add 017 CADA and 019 ADAM Guides

PhilChina · May 15, 2021 · 42eeb02 · 42eeb02
1 parent 12515f1
commit 42eeb02
Show file tree

Hide file tree

Showing 14 changed files with 271 additions and 52 deletions.
diff --git a/docs/results/nnDetectionV001.md b/docs/results/nnDetectionV001.md
diff --git a/nndet/utils/check.py b/nndet/utils/check.py
@@ -1,7 +1,57 @@
+import functools
+import os
+import warnings
+
 from nndet.io.paths import get_task
 from nndet.utils.config import load_dataset_info
 
 
+def env_guard(func):
+    """
+    Contextmanager to check nnDetection environment variables
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        # we use print here because logging might not be initialized yet and
+        # this is intended as a user warning.
+
+        # det_data
+        if os.environ.get("det_data", None) is None:
+            raise RuntimeError(
+                "'det_data' environment variable not set. "
+                "Please refer to the installation instructions. "
+                )
+
+        # det_models
+        if os.environ.get("det_models", None) is None:
+            raise RuntimeError(
+                "'det_models' environment variable not set. "
+                "Please refer to the installation instructions. "
+                )
+
+        # OMP_NUM_THREADS
+        if os.environ.get("OMP_NUM_THREADS", None) is None:
+            raise RuntimeError(
+                "'OMP_NUM_THREADS' environment variable not set. "
+                "Please refer to the installation instructions. "
+                )
+
+        # det_num_threads
+        if os.environ.get("det_num_threads", None) is None:
+            warnings.warn(
+                "Warning: 'det_num_threads' environment variable not set. "
+                "Please read installation instructions again. "
+                "Training will not work properly.")
+
+        # det_verbose
+        if os.environ.get("det_verbose", None) is None:
+            print("'det_verbose' environment variable not set. "
+                  "Continue in verbose mode.")
+
+        return func(*args, **kwargs)
+    return wrapper
+
+
 def _check_key_missing(cfg: dict, key: str, ktype=None):
     if key not in cfg:
         raise ValueError(f"Dataset information did not contain "

diff --git a/nndet/utils/info.py b/nndet/utils/info.py
@@ -33,52 +33,6 @@
 from git import Repo, InvalidGitRepositoryError
 
 
-def env_guard(func):
-    """
-    Contextmanager to check nnDetection environment variables
-    """
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        # we use print here because logging might not be initialized yet and
-        # this is intended as a user warning.
-
-        # det_data
-        if os.environ.get("det_data", None) is None:
-            raise RuntimeError(
-                "'det_data' environment variable not set. "
-                "Please refer to the installation instructions. "
-                )
-
-        # det_models
-        if os.environ.get("det_models", None) is None:
-            raise RuntimeError(
-                "'det_models' environment variable not set. "
-                "Please refer to the installation instructions. "
-                )
-
-        # OMP_NUM_THREADS
-        if os.environ.get("OMP_NUM_THREADS", None) is None:
-            raise RuntimeError(
-                "'OMP_NUM_THREADS' environment variable not set. "
-                "Please refer to the installation instructions. "
-                )
-
-        # det_num_threads
-        if os.environ.get("det_num_threads", None) is None:
-            warnings.warn(
-                "Warning: 'det_num_threads' environment variable not set. "
-                "Please read installation instructions again. "
-                "Training will not work properly.")
-
-        # det_verbose
-        if os.environ.get("det_verbose", None) is None:
-            print("'det_verbose' environment variable not set. "
-                  "Continue in verbose mode.")
-
-        return func(*args, **kwargs)
-    return wrapper
-
-
 def get_requirements():
     """
     Get all installed packages from currently active environment

diff --git a/projects/Task017_CADA/README.md b/projects/Task017_CADA/README.md
@@ -0,0 +1,14 @@
+# CADA
+**Disclaimer**: We are not the host of the data.
+Please make sure to read the requirements and usage policies of the data befor using it and **give credit to the authors of the dataset**!
+
+Please read the information from the homepage carefully and follow the rules and instructions provided by the original authors when using the data.
+- Homepage: https://cada.grand-challenge.org/Introduction/
+- Subtask: Task 1 aneurysm detection
+
+## Setup
+0. Follow the installation instructions of nnDetection and create a data directory name `Task017_CADA`.
+1. Follow the instructions and usage policies to download the data and place the data and labels at the following locations: data -> `Task017_CADA / raw / train_dataset` and labels -> `Task017_CADA / raw / train_mask_images`
+2. Run `python prepare.py` in `projects / Task017_CADA / scripts` of the nnDetection repository.
+
+The data is now prepared in the correct format and the instructions from the nnDetection README can be used to train the networks.
diff --git a/projects/Task017_CADA/scripts/prepare.py b/projects/Task017_CADA/scripts/prepare.py
@@ -0,0 +1,69 @@
+import os
+import shutil
+from pathlib import Path
+
+import SimpleITK as sitk
+from tqdm import tqdm
+
+from nndet.io import save_json
+from nndet.utils.check import env_guard
+
+
+def run_prep(source_data: Path, source_label: Path,
+             target_data_dir, target_label_dir: Path):
+    case_id = f"{(source_data.stem).rsplit('_', 1)[0]}"
+
+    shutil.copy(source_data, target_data_dir / f"{case_id}_0000.nii.gz")
+    shutil.copy(source_label, target_label_dir / f"{case_id}.nii.gz")  # rename label file to match data
+    label_itk = sitk.ReadImage(str(source_label))
+
+    label_np = sitk.GetArrayFromImage(label_itk)
+    instances = {int(_id + 1): 0 for _id in range(label_np.max())}
+    save_json({"instances": instances}, target_label_dir / f"{case_id}")
+
+
+@env_guard
+def main():
+    det_data_dir = Path(os.getenv('det_data'))
+    task_data_dir = det_data_dir / "Task017_CADA"
+
+    # setup raw paths
+    source_data_dir = task_data_dir / "raw" / "train_dataset"
+    if not source_data_dir.is_dir():
+        raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.")
+    source_label_dir = task_data_dir / "raw" / "train_mask_images"
+    if not source_label_dir.is_dir():
+        raise RuntimeError(f"{source_label_dir} should contain the raw labels but does not exist.")
+
+    # setup raw splitted dirs
+    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
+    target_data_dir.mkdir(exist_ok=True, parents=True)
+    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
+    target_label_dir.mkdir(exist_ok=True, parents=True)
+
+    # prepare dataset info
+    meta = {
+        "name": "CADA",
+        "task": "Task017_CADA",
+        "target_class": None,
+        "test_labels": False,
+        "labels": {"0": "aneurysm"},
+        "modalities": {"0": "CT"},
+        "dim": 3,
+    }
+    save_json(meta, task_data_dir / "dataset.json")
+
+    # prepare data & label
+    case_ids = [(p.stem).rsplit('_', 1)[0] for p in source_data_dir.glob("*.nii.gz")]
+    print(f"Found {len(case_ids)} case ids")
+    for cid in tqdm(case_ids):
+        run_prep(
+            source_data=source_data_dir / f"{cid}_orig.nii.gz",
+            source_label=source_label_dir / f"{cid}_labeledMasks.nii.gz",
+            target_data_dir=target_data_dir,
+            target_label_dir=target_label_dir,
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/Task019_ADAM/README.md b/projects/Task019_ADAM/README.md
@@ -0,0 +1,16 @@
+# ADAM
+**Disclaimer**: We are not the host of the data.
+Please make sure to read the requirements and usage policies of the data befor using it and **give credit to the authors of the dataset**!
+
+Please read the information from the homepage carefully and follow the rules and instructions provided by the original authors when using the data.
+- Homepage: http://adam.isi.uu.nl/
+- Subtask: Task 1
+
+## Setup
+0. Follow the installation instructions of nnDetection and create a data directory name `Task019FG_ADAM`. We added FG to the ID to indicate that unruptered and ruptured aneurysms are treated as one i.e. we are running a foreground vs background detection without distinguishing the classes.
+1. Follow the instructions and usage policies to download the data and place the data into `Task019FG_ADAM / raw / ADAM_release_subjs`
+2. Run `python prepare.py` in `projects / Task019_ADAM / scripts` of the nnDetection repository.
+3. Run `python split.py` in `projects / Task019_ADAM / scripts` of the nnDetection repository.
+4. [Info]: The provided instructions will automatically create a patient stratified random split. We used a random split for our challenge submission. By renaming the provided split file in the `preprocessed` folders, nnDetection will automatically create a random split.
+
+The data is now prepared in the correct format and the instructions from the nnDetection README can be used to train the networks.
diff --git a/projects/Task019_ADAM/scripts/prepare.py b/projects/Task019_ADAM/scripts/prepare.py
@@ -0,0 +1,74 @@
+import os
+import shutil
+from pathlib import Path
+
+from tqdm import tqdm
+
+from nndet.io import save_json
+from nndet.io.prepare import instances_from_segmentation
+from nndet.utils.check import env_guard
+
+
+def run_prep_fg_v_bg(
+        case_id: str,
+        source_data: Path,
+        target_data_dir,
+        target_label_dir: Path,
+        struct="pre/struct_aligned.nii.gz",  # bias field corrected and aligned
+        tof="pre/TOF.nii.gz",  # tof image
+        ):
+    struct_path = source_data / case_id / struct
+    tof_path = source_data / case_id / tof
+    mask_path = source_data / case_id / "aneurysms.nii.gz"
+
+    shutil.copy(struct_path, target_data_dir / f"{case_id}_0000.nii.gz")
+    shutil.copy(tof_path, target_data_dir / f"{case_id}_0001.nii.gz")
+    instances_from_segmentation(mask_path,
+                                target_label_dir,
+                                fg_vs_bg=True,
+                                file_name=f"{case_id}",
+                                )
+
+
+@env_guard
+def main():
+    det_data_dir = Path(os.getenv('det_data'))
+    task_data_dir = det_data_dir / "Task019FG_ADAM"
+
+    # setup raw paths
+    source_data_dir = task_data_dir / "raw" / "ADAM_release_subjs"
+    if not source_data_dir.is_dir():
+        raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.")
+
+    # setup raw splitted dirs
+    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
+    target_data_dir.mkdir(exist_ok=True, parents=True)
+    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
+    target_label_dir.mkdir(exist_ok=True, parents=True)
+
+    # prepare dataset info
+    meta = {
+        "name": "ADAM",
+        "task": "Task019FG_ADAM",
+        "target_class": None,
+        "test_labels": False,
+        "labels": {"0": "Aneurysm"}, # since we are running FG vs BG this is not completely correct
+        "modalities": {"0": "Structured", "1": "TOF"},
+        "dim": 3,
+    }
+    save_json(meta, task_data_dir / "dataset.json")
+
+    # prepare data
+    case_ids = [p.stem for p in source_data_dir.iterdir() if p.is_dir()]
+    print(f"Found {len(case_ids)} case ids")
+    for cid in tqdm(case_ids):
+        run_prep_fg_v_bg(
+            case_id=cid,
+            source_data=source_data_dir,
+            target_data_dir=target_data_dir,
+            target_label_dir=target_label_dir,
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/Task019_ADAM/scripts/split.py b/projects/Task019_ADAM/scripts/split.py
@@ -0,0 +1,41 @@
+import os
+from collections import OrderedDict
+from pathlib import Path
+
+import numpy as np
+from sklearn.model_selection import GroupKFold
+
+from nndet.utils.check import env_guard
+from nndet.io import get_case_ids_from_dir, save_pickle
+
+
+@env_guard
+def main():
+    det_data_dir = Path(os.getenv('det_data'))
+    task_data_dir = det_data_dir / "Task019FG_ADAM"
+
+    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
+    splits_file_dir = task_data_dir / "preprocessed"
+    splits_file_dir.mkdir(parents=True, exist_ok=True)
+    splits_file = splits_file_dir / "splits_final.pkl"
+
+    case_ids = sorted(get_case_ids_from_dir(target_label_dir, remove_modality=False))
+    case_ids_pat = [c if c.isdigit() else c[:-1] for c in case_ids]
+    case_ids_pat_unique = list(set(case_ids_pat))
+    print(f"Found {len(case_ids_pat_unique)} unique patient ids.")
+
+    splits = []
+    kfold = GroupKFold(n_splits=5)
+    for i, (train_idx, test_idx) in enumerate(kfold.split(case_ids, groups=case_ids_pat)):
+        train_keys = np.array(case_ids)[train_idx]
+        test_keys = np.array(case_ids)[test_idx]
+
+        splits.append(OrderedDict())
+        splits[-1]['train'] = train_keys
+        splits[-1]['val'] = test_keys
+        print(f"Generated split: {splits[-1]}")
+    save_pickle(splits, splits_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/consolidate.py b/scripts/consolidate.py
@@ -22,7 +22,7 @@
 from typing import Sequence
 
 from loguru import logger
-from nndet.utils.info import env_guard
+from nndet.utils.check import env_guard
 from omegaconf import OmegaConf
 
 from nndet.ptmodule import MODULE_REGISTRY

diff --git a/scripts/convert_cls2fg.py b/scripts/convert_cls2fg.py
@@ -25,7 +25,7 @@
 
 from nndet.io import get_task, load_json, save_json
 from nndet.utils.config import compose, load_dataset_info
-from nndet.utils.info import env_guard
+from nndet.utils.check import env_guard
 
 
 def convert_raw(task, overwrite, ov):

diff --git a/scripts/generate_example.py b/scripts/generate_example.py
@@ -26,7 +26,7 @@
 from loguru import logger
 
 from nndet.io import save_json
-from nndet.utils.info import env_guard
+from nndet.utils.check import env_guard
 
 
 # # 2D example

diff --git a/scripts/predict.py b/scripts/predict.py
@@ -23,7 +23,7 @@
 from loguru import logger
 from pathlib import Path
 
-from nndet.utils.info import env_guard
+from nndet.utils.check import env_guard
 from nndet.planning import PLANNER_REGISTRY
 from nndet.io import get_task, get_training_dir
 from nndet.io.load import load_pickle

diff --git a/scripts/preprocess.py b/scripts/preprocess.py
@@ -32,7 +32,7 @@
 from omegaconf import OmegaConf
 
 from nndet.utils.config import compose
-from nndet.utils.info import env_guard
+from nndet.utils.check import env_guard
 from nndet.planning import DatasetAnalyzer
 from nndet.planning import PLANNER_REGISTRY
 from nndet.planning.experiment.utils import create_labels

diff --git a/scripts/train.py b/scripts/train.py
@@ -34,7 +34,8 @@
 import nndet
 from nndet.utils.config import compose, load_dataset_info
 from nndet.utils.info import log_git, write_requirements_to_file, \
-    create_debug_plan, flatten_mapping, env_guard
+    create_debug_plan, flatten_mapping
+from nndet.utils.check import env_guard
 from nndet.utils.analysis import run_analysis_suite
 from nndet.io.datamodule.bg_module import Datamodule
 from nndet.io.paths import get_task, get_training_dir