diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fd2e3c5c6..ecb9b538e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added support for comma delimited multi-label targets to the `ImageClassifier` ([#997](https://github.com/PyTorchLightning/lightning-flash/pull/997)) +- Added `datapipeline_state` on dataset creation within the `from_*` methods from the `DataModule` ([#1018](https://github.com/PyTorchLightning/lightning-flash/pull/1018)) + ### Changed - Changed `DataSource` to `Input` ([#929](https://github.com/PyTorchLightning/lightning-flash/pull/929)) @@ -34,6 +36,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `Output` suffix to `Preds`, `FiftyOneDetectionLabels`, `SegmentationLabels`, `FiftyOneDetectionLabels`, `DetectionLabels`, `Classes`, `FiftyOneLabels`, `Labels`, `Logits`, `Probabilities` ([#1011](https://github.com/PyTorchLightning/lightning-flash/pull/1011)) + +- Changed `from_files` and `from_folders` from `ObjectDetectionData`, `InstanceSegmentationData`, `KeypointDetectionData` to support only the `predicting` stage ([#1018](https://github.com/PyTorchLightning/lightning-flash/pull/1018)) + ### Deprecated - Deprecated `flash.core.data.process.Serializer` in favour of `flash.core.data.io.output.Output` ([#927](https://github.com/PyTorchLightning/lightning-flash/pull/927)) diff --git a/flash/core/integrations/icevision/data.py b/flash/core/integrations/icevision/data.py index a6b8cb9e92..6681c72a18 100644 --- a/flash/core/integrations/icevision/data.py +++ b/flash/core/integrations/icevision/data.py @@ -52,7 +52,7 @@ def load_data( def predict_load_data( self, paths: Union[str, List[str]], ann_file: Optional[str] = None, parser: Optional[Type["Parser"]] = None ) -> List[Dict[str, Any]]: - if parser is not None: + if parser is not None and parser != Parser: return self.load_data(paths, ann_file, parser) paths = list_valid_files(paths, valid_extensions=IMG_EXTENSIONS + NP_EXTENSIONS) return [{DataKeys.INPUT: path} for path in paths] diff --git a/flash/core/model.py b/flash/core/model.py index 073c9baa1c..23bf5df5b3 100644 --- a/flash/core/model.py +++ b/flash/core/model.py @@ -507,7 +507,9 @@ def predict( # Temporary fix to support new `Input` object input = data_pipeline._input_transform_pipeline.input_of_name(input or "default") - if inspect.isclass(input) and issubclass(input, NewInputBase): + if (inspect.isclass(input) and issubclass(input, NewInputBase)) or ( + isinstance(input, functools.partial) and issubclass(input.func, NewInputBase) + ): dataset = input(running_stage, x, data_pipeline_state=self._data_pipeline_state) else: dataset = input.generate_dataset(x, running_stage) diff --git a/flash/core/utilities/flash_cli.py b/flash/core/utilities/flash_cli.py index 77814dad6b..cd3bddd732 100644 --- a/flash/core/utilities/flash_cli.py +++ b/flash/core/utilities/flash_cli.py @@ -207,7 +207,8 @@ def add_arguments_to_parser(self, parser) -> None: ) or (not hasattr(DataModule, function) and not self.legacy) ): - self.add_subcommand_from_function(subcommands, getattr(self.local_datamodule_class, function)) + if getattr(self.local_datamodule_class, function) is not None: + self.add_subcommand_from_function(subcommands, getattr(self.local_datamodule_class, function)) for datamodule_builder in self.additional_datamodule_builders: self.add_subcommand_from_function(subcommands, datamodule_builder) diff --git a/flash/graph/classification/data.py b/flash/graph/classification/data.py index ed975b9ba6..3eb43de204 100644 --- a/flash/graph/classification/data.py +++ b/flash/graph/classification/data.py @@ -16,6 +16,7 @@ from torch.utils.data import Dataset from flash.core.data.data_module import DataModule +from flash.core.data.data_pipeline import DataPipelineState from flash.core.data.io.input import InputFormat from flash.core.data.io.input_transform import InputTransform from flash.core.utilities.imports import _GRAPH_AVAILABLE @@ -74,11 +75,14 @@ def from_datasets( predict_transform: Optional[Dict[str, Callable]] = None, **data_module_kwargs, ) -> "GraphClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + return cls( - GraphDatasetInput(RunningStage.TRAINING, train_dataset), - GraphDatasetInput(RunningStage.VALIDATING, val_dataset), - GraphDatasetInput(RunningStage.TESTING, test_dataset), - GraphDatasetInput(RunningStage.PREDICTING, predict_dataset), + GraphDatasetInput(RunningStage.TRAINING, train_dataset, **dataset_kwargs), + GraphDatasetInput(RunningStage.VALIDATING, val_dataset, **dataset_kwargs), + GraphDatasetInput(RunningStage.TESTING, test_dataset, **dataset_kwargs), + GraphDatasetInput(RunningStage.PREDICTING, predict_dataset, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -94,3 +98,11 @@ def num_features(self): n_cls_val = getattr(self.val_dataset, "num_features", None) n_cls_test = getattr(self.test_dataset, "num_features", None) return n_cls_train or n_cls_val or n_cls_test + + from_folders = None + from_files = None + from_tensors = None + from_numpy = None + from_json = None + from_csv = None + from_fiftyone = None diff --git a/flash/image/classification/data.py b/flash/image/classification/data.py index 5e4539a7ed..3f2d91f4c1 100644 --- a/flash/image/classification/data.py +++ b/flash/image/classification/data.py @@ -230,11 +230,14 @@ def from_files( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs: Any, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + return cls( - ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets), - ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets), - ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets), - ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files), + ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets, **dataset_kwargs), + ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets, **dataset_kwargs), + ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets, **dataset_kwargs), + ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -259,11 +262,14 @@ def from_folders( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs: Any, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + return cls( - ImageClassificationFolderInput(RunningStage.TRAINING, train_folder), - ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder), - ImageClassificationFolderInput(RunningStage.TESTING, test_folder), - ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder), + ImageClassificationFolderInput(RunningStage.TRAINING, train_folder, **dataset_kwargs), + ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder, **dataset_kwargs), + ImageClassificationFolderInput(RunningStage.TESTING, test_folder, **dataset_kwargs), + ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -291,11 +297,14 @@ def from_numpy( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs: Any, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + return cls( - ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets), - ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets), - ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets), - ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data), + ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs), + ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs), + ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs), + ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -323,11 +332,14 @@ def from_tensors( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs: Any, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + return cls( - ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets), - ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets), - ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets), - ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data), + ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs), + ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs), + ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs), + ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -362,23 +374,19 @@ def from_data_frame( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs: Any, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + + train_data = (train_data_frame, input_field, target_fields, train_images_root, train_resolver) + val_data = (val_data_frame, input_field, target_fields, val_images_root, val_resolver) + test_data = (test_data_frame, input_field, target_fields, test_images_root, test_resolver) + predict_data = (predict_data_frame, input_field, predict_images_root, predict_resolver) + return cls( - ImageClassificationDataFrameInput( - RunningStage.TRAINING, train_data_frame, input_field, target_fields, train_images_root, train_resolver - ), - ImageClassificationCSVInput( - RunningStage.VALIDATING, val_data_frame, input_field, target_fields, val_images_root, val_resolver - ), - ImageClassificationCSVInput( - RunningStage.TESTING, test_data_frame, input_field, target_fields, test_images_root, test_resolver - ), - ImageClassificationCSVInput( - RunningStage.PREDICTING, - predict_data_frame, - input_field, - root=predict_images_root, - resolver=predict_resolver, - ), + ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs), + ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs), + ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs), + ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -413,19 +421,19 @@ def from_csv( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs: Any, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + + train_data = (train_file, input_field, target_fields, train_images_root, train_resolver) + val_data = (val_file, input_field, target_fields, val_images_root, val_resolver) + test_data = (test_file, input_field, target_fields, test_images_root, test_resolver) + predict_data = (predict_file, input_field, predict_images_root, predict_resolver) + return cls( - ImageClassificationCSVInput( - RunningStage.TRAINING, train_file, input_field, target_fields, train_images_root, train_resolver - ), - ImageClassificationCSVInput( - RunningStage.VALIDATING, val_file, input_field, target_fields, val_images_root, val_resolver - ), - ImageClassificationCSVInput( - RunningStage.TESTING, test_file, input_field, target_fields, test_images_root, test_resolver - ), - ImageClassificationCSVInput( - RunningStage.PREDICTING, predict_file, input_field, root=predict_images_root, resolver=predict_resolver - ), + ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs), + ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs), + ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs), + ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -452,11 +460,14 @@ def from_fiftyone( image_size: Tuple[int, int] = (196, 196), **data_module_kwargs, ) -> "ImageClassificationData": + + dataset_kwargs = dict(data_pipeline_state=DataPipelineState()) + return cls( - ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field), - ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field), - ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field), - ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field), + ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field, **dataset_kwargs), + ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field, **dataset_kwargs), + ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field, **dataset_kwargs), + ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, diff --git a/flash/image/detection/data.py b/flash/image/detection/data.py index a728ebc4be..ee5532d5dc 100644 --- a/flash/image/detection/data.py +++ b/flash/image/detection/data.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. from functools import partial -from typing import Any, Callable, Dict, Hashable, Optional, Sequence, Tuple, Type, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, Hashable, List, Optional, Sequence, Tuple, Type, TYPE_CHECKING, Union from flash.core.data.data_module import DataModule +from flash.core.data.data_pipeline import DataPipelineState from flash.core.data.io.input import DataKeys, InputFormat from flash.core.data.io.input_transform import InputTransform from flash.core.integrations.fiftyone.utils import FiftyOneLabelUtilities @@ -151,11 +152,12 @@ def __init__( "coco": partial(IceVisionInput, parser=COCOBBoxParser), "via": partial(IceVisionInput, parser=VIABBoxParser), "voc": partial(IceVisionInput, parser=VOCBBoxParser), + "icedata": partial(IceVisionInput, parser=parser), InputFormat.FILES: IceVisionInput, InputFormat.FOLDERS: partial(IceVisionInput, parser=parser), InputFormat.FIFTYONE: ObjectDetectionFiftyOneInput, }, - default_input=InputFormat.FILES, + default_input="icedata", ) self._default_collate = self._identity @@ -179,7 +181,7 @@ class ObjectDetectionData(DataModule): input_transform_cls = ObjectDetectionInputTransform @classmethod - def from_folders( + def from_icedata( cls, train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, @@ -196,11 +198,14 @@ def from_folders( parser: Optional[Union[Callable, Type[Parser]]] = None, **data_module_kwargs, ) -> "ObjectDetectionData": + + dataset_kwargs = dict(parser=parser, data_pipeline_state=DataPipelineState()) + return cls( - IceVisionInput(RunningStage.TRAINING, train_folder, train_ann_file, parser=parser), - IceVisionInput(RunningStage.VALIDATING, val_folder, val_ann_file, parser=parser), - IceVisionInput(RunningStage.TESTING, test_folder, test_ann_file, parser=parser), - IceVisionInput(RunningStage.PREDICTING, predict_folder, parser=parser), + IceVisionInput(RunningStage.TRAINING, train_folder, train_ann_file, **dataset_kwargs), + IceVisionInput(RunningStage.VALIDATING, val_folder, val_ann_file, **dataset_kwargs), + IceVisionInput(RunningStage.TESTING, test_folder, test_ann_file, **dataset_kwargs), + IceVisionInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -249,7 +254,7 @@ def from_coco( :class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms. image_size: The size to resize images (and their bounding boxes) to. """ - return cls.from_folders( + return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, @@ -304,7 +309,7 @@ def from_voc( :class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms. image_size: The size to resize images (and their bounding boxes) to. """ - return cls.from_folders( + return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, @@ -359,7 +364,7 @@ def from_via( :class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms. image_size: The size to resize images (and their bounding boxes) to. """ - return cls.from_folders( + return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, @@ -407,3 +412,65 @@ def from_fiftyone( ), **data_module_kwargs, ) + + @classmethod + def from_folders( + cls, + predict_folder: Optional[str] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + image_size: Tuple[int, int] = (128, 128), + **data_module_kwargs: Any, + ) -> "DataModule": + """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data folders + This is currently support only for the predicting stage. + + Args: + predict_folder: The folder containing the predict data. + predict_transform: The dictionary of transforms to use during predicting which maps + data_module_kwargs: The keywords arguments for creating the datamodule. + + Returns: + The constructed data module. + """ + return cls( + predict_dataset=IceVisionInput(RunningStage.PREDICTING, predict_folder), + input_transform=cls.input_transform_cls( + predict_transform=predict_transform, + image_size=image_size, + ), + **data_module_kwargs, + ) + + @classmethod + def from_files( + cls, + predict_files: Optional[List[str]] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + image_size: Tuple[int, int] = (128, 128), + **data_module_kwargs: Any, + ) -> "DataModule": + """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data files. + + This is currently support only for the predicting stage. + + Args: + predict_files: The list of files containing the predict data. + predict_transform: The dictionary of transforms to use during predicting which maps + data_module_kwargs: The keywords arguments for creating the datamodule. + + Returns: + The constructed data module. + """ + return cls( + predict_dataset=IceVisionInput(RunningStage.PREDICTING, predict_files), + input_transform=cls.input_transform_cls( + predict_transform=predict_transform, + image_size=image_size, + ), + **data_module_kwargs, + ) + + from_tensor = None + from_json = None + from_csv = None + from_datasets = None diff --git a/flash/image/instance_segmentation/cli.py b/flash/image/instance_segmentation/cli.py index 64838c282b..61f87b52b8 100644 --- a/flash/image/instance_segmentation/cli.py +++ b/flash/image/instance_segmentation/cli.py @@ -26,6 +26,13 @@ @requires(["image", "icedata"]) def from_pets( + train_folder: Optional[str] = None, + train_ann_file: Optional[str] = None, + val_folder: Optional[str] = None, + val_ann_file: Optional[str] = None, + test_folder: Optional[str] = None, + test_ann_file: Optional[str] = None, + predict_folder: Optional[str] = None, val_split: float = 0.1, image_size: Tuple[int, int] = (128, 128), parser: Optional[Callable] = None, @@ -37,11 +44,17 @@ def from_pets( if parser is None: parser = partial(icedata.pets.parser, mask=True) - return InstanceSegmentationData.from_folders( - train_folder=data_dir, - val_split=val_split, + return InstanceSegmentationData.from_icedata( + train_folder=train_folder or data_dir, + train_ann_file=train_ann_file, + val_folder=val_folder, + val_ann_file=val_ann_file, + test_folder=test_folder, + test_ann_file=test_ann_file, + predict_folder=predict_folder, image_size=image_size, parser=parser, + val_split=val_split, **data_module_kwargs, ) diff --git a/flash/image/instance_segmentation/data.py b/flash/image/instance_segmentation/data.py index 63c99adde8..5b975edd43 100644 --- a/flash/image/instance_segmentation/data.py +++ b/flash/image/instance_segmentation/data.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from functools import partial -from typing import Any, Callable, Dict, Optional, Tuple, Type, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union from flash.core.data.data_module import DataModule from flash.core.data.io.input import DataKeys, InputFormat @@ -51,10 +51,11 @@ def __init__( inputs={ "coco": partial(IceVisionInput, parser=COCOMaskParser), "voc": partial(IceVisionInput, parser=VOCMaskParser), + "icedata": partial(IceVisionInput, parser=Parser), InputFormat.FILES: IceVisionInput, - InputFormat.FOLDERS: partial(IceVisionInput, parser=parser), + InputFormat.FOLDERS: IceVisionInput, }, - default_input=InputFormat.FILES, + default_input="icedata", ) self._default_collate = self._identity @@ -85,7 +86,7 @@ class InstanceSegmentationData(DataModule): output_transform_cls = InstanceSegmentationOutputTransform @classmethod - def from_folders( + def from_icedata( cls, train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, @@ -155,7 +156,7 @@ def from_coco( :class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms. image_size: The size to resize images (and their masks) to. """ - return cls.from_folders( + return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, @@ -210,7 +211,7 @@ def from_voc( :class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms. image_size: The size to resize images (and their masks) to. """ - return cls.from_folders( + return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, @@ -226,3 +227,66 @@ def from_voc( parser=VOCMaskParser, **data_module_kwargs, ) + + @classmethod + def from_folders( + cls, + predict_folder: Optional[str] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + image_size: Tuple[int, int] = (128, 128), + **data_module_kwargs: Any, + ) -> "DataModule": + """Creates a :class:`~flash.core.data.data_module.DataModule` object from the given folders. + + This is supported only for the predicting stage. + + Args: + predict_folder: The folder containing the predict data. + predict_transform: The dictionary of transforms to use during predicting which maps + data_module_kwargs: The keywords arguments for creating the datamodule. + + Returns: + The constructed data module. + """ + return cls( + predict_dataset=IceVisionInput(RunningStage.PREDICTING, predict_folder), + input_transform=cls.input_transform_cls( + predict_transform=predict_transform, + image_size=image_size, + ), + **data_module_kwargs, + ) + + @classmethod + def from_files( + cls, + predict_files: Optional[List[str]] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + image_size: Tuple[int, int] = (128, 128), + **data_module_kwargs: Any, + ) -> "DataModule": + """Creates a :class:`~flash.core.data.data_module.DataModule` object from the given a list of files. + + This is supported only for the predicting stage. + + Args: + predict_files: The list of files containing the predict data. + predict_transform: The dictionary of transforms to use during predicting which maps + data_module_kwargs: The keywords arguments for creating the datamodule. + + Returns: + The constructed data module. + """ + return cls( + predict_dataset=IceVisionInput(RunningStage.PREDICTING, predict_files), + input_transform=cls.input_transform_cls( + predict_transform=predict_transform, + image_size=image_size, + ), + **data_module_kwargs, + ) + + from_tensor = None + from_json = None + from_csv = None + from_datasets = None diff --git a/flash/image/keypoint_detection/cli.py b/flash/image/keypoint_detection/cli.py index 6f3998719f..c6c6f96d00 100644 --- a/flash/image/keypoint_detection/cli.py +++ b/flash/image/keypoint_detection/cli.py @@ -25,6 +25,13 @@ @requires("image") def from_biwi( + train_folder: Optional[str] = None, + train_ann_file: Optional[str] = None, + val_folder: Optional[str] = None, + val_ann_file: Optional[str] = None, + test_folder: Optional[str] = None, + test_ann_file: Optional[str] = None, + predict_folder: Optional[str] = None, val_split: float = 0.1, image_size: Tuple[int, int] = (128, 128), parser: Optional[Callable] = None, @@ -36,8 +43,14 @@ def from_biwi( if parser is None: parser = icedata.biwi.parser - return KeypointDetectionData.from_folders( - train_folder=data_dir, + return KeypointDetectionData.from_icedata( + train_folder=train_folder or data_dir, + train_ann_file=train_ann_file, + val_folder=val_folder, + val_ann_file=val_ann_file, + test_folder=test_folder, + test_ann_file=test_ann_file, + predict_folder=predict_folder, val_split=val_split, image_size=image_size, parser=parser, diff --git a/flash/image/keypoint_detection/data.py b/flash/image/keypoint_detection/data.py index 7b7260b3ad..aad223ec36 100644 --- a/flash/image/keypoint_detection/data.py +++ b/flash/image/keypoint_detection/data.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. from functools import partial -from typing import Any, Callable, Dict, Optional, Tuple, Type, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union from flash.core.data.data_module import DataModule +from flash.core.data.data_pipeline import DataPipelineState from flash.core.data.io.input import InputFormat from flash.core.data.io.input_transform import InputTransform from flash.core.integrations.icevision.data import IceVisionInput @@ -75,7 +76,7 @@ class KeypointDetectionData(DataModule): input_transform_cls = KeypointDetectionInputTransform @classmethod - def from_folders( + def from_icedata( cls, train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, @@ -92,11 +93,14 @@ def from_folders( parser: Optional[Union[Callable, Type[Parser]]] = None, **data_module_kwargs, ) -> "KeypointDetectionData": + + dataset_kwargs = dict(parser=parser, data_pipeline_state=DataPipelineState()) + return cls( - IceVisionInput(RunningStage.TRAINING, train_folder, train_ann_file, parser=parser), - IceVisionInput(RunningStage.VALIDATING, val_folder, val_ann_file, parser=parser), - IceVisionInput(RunningStage.TESTING, test_folder, test_ann_file, parser=parser), - IceVisionInput(RunningStage.PREDICTING, predict_folder, parser=parser), + IceVisionInput(RunningStage.TRAINING, train_folder, train_ann_file, **dataset_kwargs), + IceVisionInput(RunningStage.VALIDATING, val_folder, val_ann_file, **dataset_kwargs), + IceVisionInput(RunningStage.TESTING, test_folder, test_ann_file, **dataset_kwargs), + IceVisionInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs), input_transform=cls.input_transform_cls( train_transform, val_transform, @@ -145,7 +149,7 @@ def from_coco( :class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms. image_size: The size to resize images (and rescale their keypoints) to. """ - return cls.from_folders( + return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, @@ -161,3 +165,66 @@ def from_coco( parser=COCOKeyPointsParser, **data_module_kwargs, ) + + @classmethod + def from_folders( + cls, + predict_folder: Optional[str] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + image_size: Tuple[int, int] = (128, 128), + **data_module_kwargs: Any, + ) -> "DataModule": + """Creates a :class:`~flash.core.data.data_module.DataModule` object from the given folders. + + This is supported only for the predicting stage. + + Args: + predict_folder: The folder containing the predict data. + predict_transform: The dictionary of transforms to use during predicting which maps + data_module_kwargs: The keywords arguments for creating the datamodule. + + Returns: + The constructed data module. + """ + return cls( + predict_dataset=IceVisionInput(RunningStage.PREDICTING, predict_folder), + input_transform=cls.input_transform_cls( + predict_transform=predict_transform, + image_size=image_size, + ), + **data_module_kwargs, + ) + + @classmethod + def from_files( + cls, + predict_files: Optional[List[str]] = None, + predict_transform: Optional[Dict[str, Callable]] = None, + image_size: Tuple[int, int] = (128, 128), + **data_module_kwargs: Any, + ) -> "DataModule": + """Creates a :class:`~flash.core.data.data_module.DataModule` object from the given a list of files. + + This is supported only for the predicting stage. + + Args: + predict_files: The list of files containing the predict data. + predict_transform: The dictionary of transforms to use during predicting which maps + data_module_kwargs: The keywords arguments for creating the datamodule. + + Returns: + The constructed data module. + """ + return cls( + predict_dataset=IceVisionInput(RunningStage.PREDICTING, predict_files), + input_transform=cls.input_transform_cls( + predict_transform=predict_transform, + image_size=image_size, + ), + **data_module_kwargs, + ) + + from_tensor = None + from_json = None + from_csv = None + from_datasets = None diff --git a/flash/tabular/data.py b/flash/tabular/data.py index 91ccef1043..a01622f999 100644 --- a/flash/tabular/data.py +++ b/flash/tabular/data.py @@ -19,6 +19,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException from flash.core.data.data_module import DataModule +from flash.core.data.data_pipeline import DataPipelineState from flash.core.data.io.classification_input import ClassificationState from flash.core.data.io.input import DataKeys, InputFormat from flash.core.data.io.input_base import Input @@ -313,6 +314,9 @@ def from_data_frame( predict_transform: Optional[Dict[str, Callable]] = None, **data_module_kwargs: Any, ) -> "TabularData": + + data_pipeline_state = DataPipelineState() + train_input = TabularDataFrameInput( RunningStage.TRAINING, train_data_frame, @@ -320,13 +324,18 @@ def from_data_frame( numerical_fields=numerical_fields, target_field=target_fields, is_regression=cls.is_regression, + data_pipeline_state=data_pipeline_state, ) - parameters = train_input.parameters if train_input else parameters + + dataset_kwargs = dict( + data_pipeline_state=data_pipeline_state, parameters=train_input.parameters if train_input else parameters + ) + return cls( train_input, - TabularDataFrameInput(RunningStage.VALIDATING, val_data_frame, parameters=parameters), - TabularDataFrameInput(RunningStage.TESTING, test_data_frame, parameters=parameters), - TabularDataFrameInput(RunningStage.PREDICTING, predict_data_frame, parameters=parameters), + TabularDataFrameInput(RunningStage.VALIDATING, val_data_frame, **dataset_kwargs), + TabularDataFrameInput(RunningStage.TESTING, test_data_frame, **dataset_kwargs), + TabularDataFrameInput(RunningStage.PREDICTING, predict_data_frame, **dataset_kwargs), input_transform=cls.input_transform_cls(train_transform, val_transform, test_transform, predict_transform), **data_module_kwargs, ) @@ -348,6 +357,9 @@ def from_csv( predict_transform: Optional[Dict[str, Callable]] = None, **data_module_kwargs: Any, ) -> "TabularData": + + data_pipeline_state = DataPipelineState() + train_input = TabularCSVInput( RunningStage.TRAINING, train_file, @@ -355,13 +367,18 @@ def from_csv( numerical_fields=numerical_fields, target_field=target_fields, is_regression=cls.is_regression, + data_pipeline_state=data_pipeline_state, ) - parameters = train_input.parameters if train_input else parameters + + dataset_kwargs = dict( + data_pipeline_state=data_pipeline_state, parameters=train_input.parameters if train_input else parameters + ) + return cls( train_input, - TabularCSVInput(RunningStage.VALIDATING, val_file, parameters=parameters), - TabularCSVInput(RunningStage.TESTING, test_file, parameters=parameters), - TabularCSVInput(RunningStage.PREDICTING, predict_file, parameters=parameters), + TabularCSVInput(RunningStage.VALIDATING, val_file, **dataset_kwargs), + TabularCSVInput(RunningStage.TESTING, test_file, **dataset_kwargs), + TabularCSVInput(RunningStage.PREDICTING, predict_file, **dataset_kwargs), input_transform=cls.input_transform_cls(train_transform, val_transform, test_transform, predict_transform), **data_module_kwargs, ) diff --git a/flash/tabular/forecasting/data.py b/flash/tabular/forecasting/data.py index a10ae91c01..1e0d7456c2 100644 --- a/flash/tabular/forecasting/data.py +++ b/flash/tabular/forecasting/data.py @@ -19,6 +19,7 @@ from torch.utils.data.sampler import Sampler from flash.core.data.data_module import DataModule +from flash.core.data.data_pipeline import DataPipelineState from flash.core.data.io.input import DataKeys, InputFormat from flash.core.data.io.input_base import Input from flash.core.data.io.input_transform import InputTransform @@ -161,6 +162,9 @@ def from_data_frame( :attr:`~flash.tabular.forecasting.data.TabularForecastingData.parameters` attribute of the :class:`~flash.tabular.forecasting.data.TabularForecastingData` object that contains your training data. """ + + data_pipeline_state = DataPipelineState() + train_input = TabularForecastingDataFrameInput( RunningStage.TRAINING, train_data_frame, @@ -168,13 +172,17 @@ def from_data_frame( group_ids=group_ids, target=target, **time_series_dataset_kwargs, + data_pipeline_state=data_pipeline_state, + ) + + dataset_kwargs = dict( + data_pipeline_state=data_pipeline_state, parameters=train_input.parameters if train_input else parameters ) - parameters = train_input.parameters if train_input else parameters return cls( train_input, - TabularForecastingDataFrameInput(RunningStage.VALIDATING, val_data_frame, parameters=parameters), - TabularForecastingDataFrameInput(RunningStage.TESTING, test_data_frame, parameters=parameters), - TabularForecastingDataFrameInput(RunningStage.PREDICTING, predict_data_frame, parameters=parameters), + TabularForecastingDataFrameInput(RunningStage.VALIDATING, val_data_frame, **dataset_kwargs), + TabularForecastingDataFrameInput(RunningStage.TESTING, test_data_frame, **dataset_kwargs), + TabularForecastingDataFrameInput(RunningStage.PREDICTING, predict_data_frame, **dataset_kwargs), input_transform=cls.input_transform_cls(train_transform, val_transform, test_transform, predict_transform), val_split=val_split, batch_size=batch_size, diff --git a/flash/video/classification/data.py b/flash/video/classification/data.py index 4555ae2c90..e3bdabdad5 100644 --- a/flash/video/classification/data.py +++ b/flash/video/classification/data.py @@ -374,6 +374,7 @@ def from_files( video_sampler=video_sampler, decode_audio=decode_audio, decoder=decoder, + data_pipeline_state=DataPipelineState(), ) return cls( VideoClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets, **dataset_kwargs), @@ -415,6 +416,7 @@ def from_folders( video_sampler=video_sampler, decode_audio=decode_audio, decoder=decoder, + data_pipeline_state=DataPipelineState(), ) return cls( VideoClassificationFoldersInput(RunningStage.TRAINING, train_folder, **dataset_kwargs), @@ -459,6 +461,7 @@ def from_fiftyone( decode_audio=decode_audio, decoder=decoder, label_field=label_field, + data_pipeline_state=DataPipelineState(), ) return cls( VideoClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, **dataset_kwargs), diff --git a/tests/audio/classification/test_data.py b/tests/audio/classification/test_data.py index fac7ad14fc..e04d3e1dc1 100644 --- a/tests/audio/classification/test_data.py +++ b/tests/audio/classification/test_data.py @@ -18,6 +18,7 @@ import pytest import torch import torch.nn as nn +from pytorch_lightning import seed_everything from flash.audio import AudioClassificationData from flash.core.data.io.input import DataKeys @@ -320,6 +321,7 @@ def test_from_folders_only_train(tmpdir): @pytest.mark.skipif(not _AUDIO_TESTING, reason="audio libraries aren't installed.") def test_from_folders_train_val(tmpdir): + seed_everything(42) train_dir = Path(tmpdir / "train") train_dir.mkdir() @@ -343,6 +345,7 @@ def test_from_folders_train_val(tmpdir): imgs, labels = data["input"], data["target"] assert imgs.shape == (2, 3, 128, 128) assert labels.shape == (2,) + assert list(labels.numpy()) == [0, 1] data = next(iter(spectrograms_data.val_dataloader())) imgs, labels = data["input"], data["target"] diff --git a/tests/image/classification/test_data.py b/tests/image/classification/test_data.py index 6a9271174f..9328908c91 100644 --- a/tests/image/classification/test_data.py +++ b/tests/image/classification/test_data.py @@ -19,6 +19,7 @@ import pytest import torch import torch.nn as nn +from pytorch_lightning import seed_everything from flash.core.data.io.input import DataKeys from flash.core.data.transforms import ApplyToKeys, merge_transforms @@ -274,6 +275,7 @@ def test_from_folders_only_train(tmpdir): @pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed.") def test_from_folders_train_val(tmpdir): + seed_everything(42) train_dir = Path(tmpdir / "train") train_dir.mkdir() @@ -286,7 +288,7 @@ def test_from_folders_train_val(tmpdir): _rand_image().save(train_dir / "b" / "1.png") _rand_image().save(train_dir / "b" / "2.png") img_data = ImageClassificationData.from_folders( - train_dir, + train_folder=train_dir, val_folder=train_dir, test_folder=train_dir, batch_size=2, @@ -297,6 +299,7 @@ def test_from_folders_train_val(tmpdir): imgs, labels = data["input"], data["target"] assert imgs.shape == (2, 3, 196, 196) assert labels.shape == (2,) + assert list(labels.numpy()) == [0, 1] data = next(iter(img_data.val_dataloader())) imgs, labels = data["input"], data["target"] diff --git a/tests/image/detection/test_data.py b/tests/image/detection/test_data.py index 875d0d9711..b9395dc67d 100644 --- a/tests/image/detection/test_data.py +++ b/tests/image/detection/test_data.py @@ -102,6 +102,27 @@ def _create_synth_coco_dataset(tmpdir): return train_folder, coco_ann_path +def _create_synth_folders_dataset(tmpdir): + + predict = Path(tmpdir / "predict") + predict.mkdir() + + (predict / "images").mkdir() + Image.new("RGB", (224, 224)).save(predict / "images" / "sample_one.png") + Image.new("RGB", (224, 224)).save(predict / "images" / "sample_two.png") + + predict_folder = os.fspath(Path(predict / "images")) + + return predict_folder + + +def _create_synth_files_dataset(tmpdir): + + predict_folder = _create_synth_folders_dataset(tmpdir) + + return [os.path.join(predict_folder, f) for f in os.listdir(predict_folder)] + + def _create_synth_fiftyone_dataset(tmpdir): img_dir = Path(tmpdir / "fo_imgs") img_dir.mkdir() @@ -206,3 +227,33 @@ def test_image_detector_data_from_fiftyone(tmpdir): data = next(iter(datamodule.test_dataloader())) sample = data[0] assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +@pytest.mark.skipif(not _IMAGE_AVAILABLE, reason="image libraries aren't installed.") +@pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing") +def test_image_detector_data_from_files(tmpdir): + + predict_files = _create_synth_files_dataset(tmpdir) + datamodule = ObjectDetectionData.from_files(predict_files=predict_files, batch_size=1, image_size=(128, 128)) + data = next(iter(datamodule.predict_dataloader())) + sample = data[0] + assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +@pytest.mark.skipif(not _IMAGE_AVAILABLE, reason="image libraries aren't installed.") +@pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing") +def test_image_detector_data_from_folders(tmpdir): + + predict_folder = _create_synth_folders_dataset(tmpdir) + datamodule = ObjectDetectionData.from_folders(predict_folder=predict_folder, batch_size=1, image_size=(128, 128)) + data = next(iter(datamodule.predict_dataloader())) + sample = data[0] + assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +def test_data_non_supported(): + + assert not ObjectDetectionData.from_tensor + assert not ObjectDetectionData.from_json + assert not ObjectDetectionData.from_csv + assert not ObjectDetectionData.from_datasets diff --git a/tests/image/instance_segmentation/test_data.py b/tests/image/instance_segmentation/test_data.py new file mode 100644 index 0000000000..67ce71db82 --- /dev/null +++ b/tests/image/instance_segmentation/test_data.py @@ -0,0 +1,51 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from flash.core.data.io.input import DataKeys +from flash.core.utilities.imports import _COCO_AVAILABLE, _IMAGE_AVAILABLE +from flash.image.instance_segmentation import InstanceSegmentationData +from tests.image.detection.test_data import _create_synth_files_dataset, _create_synth_folders_dataset + + +@pytest.mark.skipif(not _IMAGE_AVAILABLE, reason="image libraries aren't installed.") +@pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing") +def test_image_detector_data_from_files(tmpdir): + + predict_files = _create_synth_files_dataset(tmpdir) + datamodule = InstanceSegmentationData.from_files(predict_files=predict_files, batch_size=1, image_size=(128, 128)) + data = next(iter(datamodule.predict_dataloader())) + sample = data[0] + assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +@pytest.mark.skipif(not _IMAGE_AVAILABLE, reason="image libraries aren't installed.") +@pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing") +def test_image_detector_data_from_folders(tmpdir): + + predict_folder = _create_synth_folders_dataset(tmpdir) + datamodule = InstanceSegmentationData.from_folders( + predict_folder=predict_folder, batch_size=1, image_size=(128, 128) + ) + data = next(iter(datamodule.predict_dataloader())) + sample = data[0] + assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +def test_data_non_supported(): + + assert not InstanceSegmentationData.from_tensor + assert not InstanceSegmentationData.from_json + assert not InstanceSegmentationData.from_csv + assert not InstanceSegmentationData.from_datasets diff --git a/tests/image/instance_segmentation/test_model.py b/tests/image/instance_segmentation/test_model.py index 052b3a5884..d432889639 100644 --- a/tests/image/instance_segmentation/test_model.py +++ b/tests/image/instance_segmentation/test_model.py @@ -52,7 +52,7 @@ def test_instance_segmentation_inference(tmpdir): icevision.utils.data_dir.data_dir.mkdir(exist_ok=True, parents=True) data_dir = icedata.pets.load_data() - datamodule = InstanceSegmentationData.from_folders( + datamodule = InstanceSegmentationData.from_icedata( train_folder=data_dir, val_split=0.1, parser=partial(icedata.pets.parser, mask=True), diff --git a/tests/image/keypoint_detection/test_data.py b/tests/image/keypoint_detection/test_data.py new file mode 100644 index 0000000000..1bf9b74ca6 --- /dev/null +++ b/tests/image/keypoint_detection/test_data.py @@ -0,0 +1,49 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from flash.core.data.io.input import DataKeys +from flash.core.utilities.imports import _COCO_AVAILABLE, _IMAGE_AVAILABLE +from flash.image.keypoint_detection import KeypointDetectionData +from tests.image.detection.test_data import _create_synth_files_dataset, _create_synth_folders_dataset + + +@pytest.mark.skipif(not _IMAGE_AVAILABLE, reason="image libraries aren't installed.") +@pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing") +def test_image_detector_data_from_files(tmpdir): + + predict_files = _create_synth_files_dataset(tmpdir) + datamodule = KeypointDetectionData.from_files(predict_files=predict_files, batch_size=1, image_size=(128, 128)) + data = next(iter(datamodule.predict_dataloader())) + sample = data[0] + assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +@pytest.mark.skipif(not _IMAGE_AVAILABLE, reason="image libraries aren't installed.") +@pytest.mark.skipif(not _COCO_AVAILABLE, reason="pycocotools is not installed for testing") +def test_image_detector_data_from_folders(tmpdir): + + predict_folder = _create_synth_folders_dataset(tmpdir) + datamodule = KeypointDetectionData.from_folders(predict_folder=predict_folder, batch_size=1, image_size=(128, 128)) + data = next(iter(datamodule.predict_dataloader())) + sample = data[0] + assert sample[DataKeys.INPUT].shape == (128, 128, 3) + + +def test_data_non_supported(): + + assert not KeypointDetectionData.from_tensor + assert not KeypointDetectionData.from_json + assert not KeypointDetectionData.from_csv + assert not KeypointDetectionData.from_datasets