Merge branch 'dev' into update-cucim-dep

bhashemian · web-flow · commit c9a1ecc5039e · 2021-09-22T09:51:45.000-04:00
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
@@ -575,6 +575,7 @@ def __init__(
         cache_rate: float = 1.0,
         num_workers: Optional[int] = None,
         progress: bool = True,
+        copy_cache: bool = True,
     ) -> None:
         """
         Args:
@@ -587,11 +588,16 @@ def __init__(
             num_workers: the number of worker processes to use.
                 If num_workers is None then the number returned by os.cpu_count() is used.
             progress: whether to display a progress bar.
+            copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
+                default to `True`. if the random transforms don't modify the cache content
+                or every cache item is only used once in a `multi-processing` environment,
+                may set `copy=False` for better performance.
         """
         if not isinstance(transform, Compose):
             transform = Compose(transform)
         super().__init__(data=data, transform=transform)
         self.progress = progress
+        self.copy_cache = copy_cache
         self.cache_num = min(int(cache_num), int(len(data) * cache_rate), len(data))
         self.num_workers = num_workers
         if self.num_workers is not None:
@@ -656,7 +662,8 @@ def _transform(self, index: int):
                 # only need to deep copy data on first non-deterministic transform
                 if not start_run:
                     start_run = True
-                    data = deepcopy(data)
+                    if self.copy_cache:
+                        data = deepcopy(data)
                 data = apply_transform(_transform, data)
         return data
 
@@ -722,6 +729,10 @@ class SmartCacheDataset(Randomizable, CacheDataset):
         shuffle: whether to shuffle the whole data list before preparing the cache content for first epoch.
             it will not modify the original input data sequence in-place.
         seed: random seed if shuffle is `True`, default to `0`.
+        copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
+            default to `True`. if the random transforms don't modify the cache content
+            or every cache item is only used once in a `multi-processing` environment,
+            may set `copy=False` for better performance.
     """
 
     def __init__(
@@ -736,14 +747,15 @@ def __init__(
         progress: bool = True,
         shuffle: bool = True,
         seed: int = 0,
+        copy_cache: bool = True,
     ) -> None:
         if shuffle:
             self.set_random_state(seed=seed)
             data = copy(data)
             self.randomize(data)
         self.shuffle = shuffle
 
-        super().__init__(data, transform, cache_num, cache_rate, num_init_workers, progress)
+        super().__init__(data, transform, cache_num, cache_rate, num_init_workers, progress, copy_cache)
         if self._cache is None:
             self._cache = self._fill_cache()
         if self.cache_num >= len(data):
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
@@ -130,12 +130,12 @@ class    mean    median    max    5percentile 95percentile  notnans
             if summary_ops is not None:
                 supported_ops = OrderedDict(
                     {
-                        "mean": lambda x: np.nanmean(x),
-                        "median": lambda x: np.nanmedian(x),
-                        "max": lambda x: np.nanmax(x),
-                        "min": lambda x: np.nanmin(x),
+                        "mean": np.nanmean,
+                        "median": np.nanmedian,
+                        "max": np.nanmax,
+                        "min": np.nanmin,
                         "90percentile": lambda x: np.nanpercentile(x[0], x[1]),
-                        "std": lambda x: np.nanstd(x),
+                        "std": np.nanstd,
                         "notnans": lambda x: (~np.isnan(x)).sum(),
                     }
                 )
@@ -149,7 +149,7 @@ def _compute_op(op: str, d: np.ndarray):
                         return c_op(d)
 
                     threshold = int(op.split("percentile")[0])
-                    return supported_ops["90percentile"]((d, threshold))
+                    return supported_ops["90percentile"]((d, threshold))  # type: ignore
 
                 with open(os.path.join(save_dir, f"{k}_summary.csv"), "w") as f:
                     f.write(f"class{deli}{deli.join(ops)}\n")
diff --git a/monai/transforms/compose.py b/monai/transforms/compose.py
@@ -204,14 +204,13 @@ def __init__(
     def _normalize_probabilities(self, weights):
         if len(weights) == 0:
             return weights
-        else:
-            weights = np.array(weights)
-            if np.any(weights < 0):
-                raise AssertionError("Probabilities must be greater than or equal to zero.")
-            if np.all(weights == 0):
-                raise AssertionError("At least one probability must be greater than zero.")
-            weights = weights / weights.sum()
-            return list(weights)
+        weights = np.array(weights)
+        if np.any(weights < 0):
+            raise AssertionError("Probabilities must be greater than or equal to zero.")
+        if np.all(weights == 0):
+            raise AssertionError("At least one probability must be greater than zero.")
+        weights = weights / weights.sum()
+        return list(weights)
 
     def flatten(self):
         transforms = []
@@ -232,16 +231,15 @@ def flatten(self):
     def __call__(self, data):
         if len(self.transforms) == 0:
             return data
-        else:
-            index = self.R.multinomial(1, self.weights).argmax()
-            _transform = self.transforms[index]
-            data = apply_transform(_transform, data, self.map_items, self.unpack_items)
-            # if the data is a mapping (dictionary), append the OneOf transform to the end
-            if isinstance(data, Mapping):
-                for key in data.keys():
-                    if key + InverseKeys.KEY_SUFFIX in data:
-                        self.push_transform(data, key, extra_info={"index": index})
-            return data
+        index = self.R.multinomial(1, self.weights).argmax()
+        _transform = self.transforms[index]
+        data = apply_transform(_transform, data, self.map_items, self.unpack_items)
+        # if the data is a mapping (dictionary), append the OneOf transform to the end
+        if isinstance(data, Mapping):
+            for key in data.keys():
+                if key + InverseKeys.KEY_SUFFIX in data:
+                    self.push_transform(data, key, extra_info={"index": index})
+        return data
 
     def inverse(self, data):
         if len(self.transforms) == 0:
diff --git a/monai/transforms/croppad/dictionary.py b/monai/transforms/croppad/dictionary.py
@@ -663,7 +663,6 @@ def __init__(
             random_size=random_size,
             allow_missing_keys=allow_missing_keys,
         )
-        MapTransform.__init__(self, keys, allow_missing_keys)
         self.roi_scale = roi_scale
         self.max_roi_scale = max_roi_scale
 
diff --git a/monai/transforms/intensity/array.py b/monai/transforms/intensity/array.py
@@ -1457,7 +1457,7 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
             raise RuntimeError("Image needs a channel direction.")
         if isinstance(self.loc[0], int) and len(img.shape) == 4 and len(self.loc) == 2:
             raise RuntimeError("Input images of dimension 4 need location tuple to be length 3 or 4")
-        if isinstance(self.loc[0], Sequence) and len(img.shape) == 4 and min(map(lambda x: len(x), self.loc)) == 2:
+        if isinstance(self.loc[0], Sequence) and len(img.shape) == 4 and min(map(len, self.loc)) == 2:
             raise RuntimeError("Input images of dimension 4 need location tuple to be length 3 or 4")
 
         n_dims = len(img.shape[1:])
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
@@ -205,7 +205,7 @@ def __call__(
 
         rounding = self.rounding if rounding is None else rounding
         if rounding is not None:
-            rounding = look_up_option(rounding, ["torchrounding"])
+            look_up_option(rounding, ["torchrounding"])
             img = torch.round(img)
 
         return img.float()
diff --git a/monai/transforms/spatial/array.py b/monai/transforms/spatial/array.py
@@ -335,8 +335,7 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         if isinstance(img, np.ndarray):
             return np.ascontiguousarray(np.flip(img, map_spatial_axes(img.ndim, self.spatial_axis)))
-        else:
-            return torch.flip(img, map_spatial_axes(img.ndim, self.spatial_axis))
+        return torch.flip(img, map_spatial_axes(img.ndim, self.spatial_axis))
 
 
 class Resize(Transform):
diff --git a/monai/transforms/utility/array.py b/monai/transforms/utility/array.py
@@ -391,9 +391,8 @@ def __call__(self, data: NdarrayOrTensor):
         if self.data_type == "tensor":
             dtype_ = get_equivalent_dtype(self.dtype, torch.Tensor)
             return convert_to_tensor(data, dtype=dtype_, device=self.device)
-        else:
-            dtype_ = get_equivalent_dtype(self.dtype, np.ndarray)
-            return convert_to_numpy(data, dtype=dtype_)
+        dtype_ = get_equivalent_dtype(self.dtype, np.ndarray)
+        return convert_to_numpy(data, dtype=dtype_)
 
 
 class ToNumpy(Transform):
@@ -1091,11 +1090,11 @@ def __call__(
             img_ = img[mask]
 
         supported_ops = {
-            "mean": lambda x: np.nanmean(x),
-            "median": lambda x: np.nanmedian(x),
-            "max": lambda x: np.nanmax(x),
-            "min": lambda x: np.nanmin(x),
-            "std": lambda x: np.nanstd(x),
+            "mean": np.nanmean,
+            "median": np.nanmedian,
+            "max": np.nanmax,
+            "min": np.nanmin,
+            "std": np.nanstd,
         }
 
         def _compute(op: Callable, data: np.ndarray):
@@ -1107,7 +1106,7 @@ def _compute(op: Callable, data: np.ndarray):
         for o in self.ops:
             if isinstance(o, str):
                 o = look_up_option(o, supported_ops.keys())
-                meta_data[self.key_prefix + "_" + o] = _compute(supported_ops[o], img_)
+                meta_data[self.key_prefix + "_" + o] = _compute(supported_ops[o], img_)  # type: ignore
             elif callable(o):
                 meta_data[self.key_prefix + "_custom_" + str(custom_index)] = _compute(o, img_)
                 custom_index += 1
diff --git a/monai/transforms/utility/dictionary.py b/monai/transforms/utility/dictionary.py
@@ -15,7 +15,6 @@
 Class names are ended with 'd' to denote dictionary-based transforms.
 """
 
-import copy
 import logging
 import re
 from copy import deepcopy
@@ -886,7 +885,7 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
                 if isinstance(val, torch.Tensor):
                     d[new_key] = val.detach().clone()
                 else:
-                    d[new_key] = copy.deepcopy(val)
+                    d[new_key] = deepcopy(val)
         return d
 
 
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
@@ -20,12 +20,11 @@
 import torch
 
 import monai
-import monai.transforms.transform
 from monai.config import DtypeLike, IndexSelection
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.networks.layers import GaussianFilter
 from monai.transforms.compose import Compose, OneOf
-from monai.transforms.transform import MapTransform, Transform
+from monai.transforms.transform import MapTransform, Transform, apply_transform
 from monai.transforms.utils_pytorch_numpy_unification import any_np_pt, nonzero, ravel, unravel_index
 from monai.utils import (
     GridSampleMode,
@@ -1330,9 +1329,7 @@ def _get_data(obj, key):
         prev_data = _get_data(test_data, key)
         prev_type = type(prev_data)
         prev_device = prev_data.device if isinstance(prev_data, torch.Tensor) else None
-        test_data = monai.transforms.transform.apply_transform(
-            _transform, test_data, transform.map_items, transform.unpack_items
-        )
+        test_data = apply_transform(_transform, test_data, transform.map_items, transform.unpack_items)
         # every time the type or device changes, increment the counter
         curr_data = _get_data(test_data, key)
         curr_device = curr_data.device if isinstance(curr_data, torch.Tensor) else None
diff --git a/monai/transforms/utils_pytorch_numpy_unification.py b/monai/transforms/utils_pytorch_numpy_unification.py
@@ -159,8 +159,7 @@ def floor_divide(a: NdarrayOrTensor, b) -> NdarrayOrTensor:
         if is_module_ver_at_least(torch, (1, 8, 0)):
             return torch.div(a, b, rounding_mode="floor")
         return torch.floor_divide(a, b)
-    else:
-        return np.floor_divide(a, b)
+    return np.floor_divide(a, b)
 
 
 def unravel_index(idx, shape):
diff --git a/tests/test_cachedataset.py b/tests/test_cachedataset.py
@@ -19,7 +19,7 @@
 from parameterized import parameterized
 
 from monai.data import CacheDataset, DataLoader, PersistentDataset, SmartCacheDataset
-from monai.transforms import Compose, Lambda, LoadImaged, ThreadUnsafe, Transform
+from monai.transforms import Compose, Lambda, LoadImaged, RandLambda, ThreadUnsafe, Transform
 from monai.utils import get_torch_version_tuple
 
 TEST_CASE_1 = [Compose([LoadImaged(keys=["image", "label", "extra"])]), (128, 128, 128)]
@@ -84,27 +84,36 @@ def test_shape(self, transform, expected_shape):
     def test_set_data(self):
         data_list1 = list(range(10))
 
-        transform = Lambda(func=lambda x: np.array([x * 10]))
+        transform = Compose(
+            [
+                Lambda(func=lambda x: np.array([x * 10])),
+                RandLambda(func=lambda x: x + 1),
+            ]
+        )
 
         dataset = CacheDataset(
             data=data_list1,
             transform=transform,
             cache_rate=1.0,
             num_workers=4,
             progress=True,
+            copy_cache=False if sys.platform == "linux" else True,
         )
 
         num_workers = 2 if sys.platform == "linux" else 0
         dataloader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=1)
         for i, d in enumerate(dataloader):
-            np.testing.assert_allclose([[data_list1[i] * 10]], d)
+            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
+        # simulate another epoch, the cache content should not be modified
+        for i, d in enumerate(dataloader):
+            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
 
         # update the datalist and fill the cache content
         data_list2 = list(range(-10, 0))
         dataset.set_data(data=data_list2)
         # rerun with updated cache content
         for i, d in enumerate(dataloader):
-            np.testing.assert_allclose([[data_list2[i] * 10]], d)
+            np.testing.assert_allclose([[data_list2[i] * 10 + 1]], d)
 
 
 class _StatefulTransform(Transform, ThreadUnsafe):
diff --git a/tests/test_intensity_stats.py b/tests/test_intensity_stats.py
@@ -31,7 +31,7 @@
 ]
 
 TEST_CASE_3 = [
-    {"ops": [lambda x: np.mean(x), "max", lambda x: np.min(x)], "key_prefix": "orig"},
+    {"ops": [np.mean, "max", np.min], "key_prefix": "orig"},
     np.array([[[0.0, 1.0], [2.0, 3.0]]]),
     None,
     {"orig_custom_0": 1.5, "orig_max": 3.0, "orig_custom_1": 0.0},
diff --git a/tests/test_intensity_statsd.py b/tests/test_intensity_statsd.py
@@ -34,7 +34,7 @@
 ]
 
 TEST_CASE_3 = [
-    {"keys": "img", "ops": [lambda x: np.mean(x), "max", lambda x: np.min(x)], "key_prefix": "orig"},
+    {"keys": "img", "ops": [np.mean, "max", np.min], "key_prefix": "orig"},
     {"img": np.array([[[0.0, 1.0], [2.0, 3.0]]])},
     "img_meta_dict",
     {"orig_custom_0": 1.5, "orig_max": 3.0, "orig_custom_1": 0.0},

Original file line number	Diff line number	Diff line change
`@@ -663,7 +663,6 @@ def __init__(`
`663`	`663`	`random_size=random_size,`
`664`	`664`	`allow_missing_keys=allow_missing_keys,`
`665`	`665`	`)`
`666`		`- MapTransform.__init__(self, keys, allow_missing_keys)`
`667`	`666`	`self.roi_scale = roi_scale`
`668`	`667`	`self.max_roi_scale = max_roi_scale`
`669`	`668`