Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Data Pipeline V2: Cleanup #1018

Merged
merged 10 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions flash/graph/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from torch.utils.data import Dataset

from flash.core.data.data_module import DataModule
from flash.core.data.data_pipeline import DataPipelineState
from flash.core.data.io.input import InputFormat
from flash.core.data.io.input_transform import InputTransform
from flash.core.utilities.imports import _GRAPH_AVAILABLE
Expand Down Expand Up @@ -74,11 +75,14 @@ def from_datasets(
predict_transform: Optional[Dict[str, Callable]] = None,
**data_module_kwargs,
) -> "GraphClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
GraphDatasetInput(RunningStage.TRAINING, train_dataset),
GraphDatasetInput(RunningStage.VALIDATING, val_dataset),
GraphDatasetInput(RunningStage.TESTING, test_dataset),
GraphDatasetInput(RunningStage.PREDICTING, predict_dataset),
GraphDatasetInput(RunningStage.TRAINING, train_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.VALIDATING, val_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.TESTING, test_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.PREDICTING, predict_dataset, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -94,3 +98,11 @@ def num_features(self):
n_cls_val = getattr(self.val_dataset, "num_features", None)
n_cls_test = getattr(self.test_dataset, "num_features", None)
return n_cls_train or n_cls_val or n_cls_test

from_folders = None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ethanwharris Do we want to do this for now? Just an idea, but might not be worth it.

from_files = None
from_tensors = None
from_numpy = None
from_json = None
from_csv = None
from_fiftyone = None
107 changes: 59 additions & 48 deletions flash/image/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,14 @@ def from_files(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets),
ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets),
ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets),
ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files),
ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -261,11 +264,14 @@ def from_folders(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFolderInput(RunningStage.TRAINING, train_folder),
ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder),
ImageClassificationFolderInput(RunningStage.TESTING, test_folder),
ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder),
ImageClassificationFolderInput(RunningStage.TRAINING, train_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.TESTING, test_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -293,11 +299,14 @@ def from_numpy(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets),
ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets),
ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets),
ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data),
ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -325,11 +334,14 @@ def from_tensors(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets),
ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets),
ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets),
ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data),
ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -364,23 +376,19 @@ def from_data_frame(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

train_data = (train_data_frame, input_field, target_fields, train_images_root, train_resolver)
val_data = (val_data_frame, input_field, target_fields, val_images_root, val_resolver)
test_data = (test_data_frame, input_field, target_fields, test_images_root, test_resolver)
predict_data = (predict_data_frame, input_field, predict_images_root, predict_resolver)

return cls(
ImageClassificationDataFrameInput(
RunningStage.TRAINING, train_data_frame, input_field, target_fields, train_images_root, train_resolver
),
ImageClassificationCSVInput(
RunningStage.VALIDATING, val_data_frame, input_field, target_fields, val_images_root, val_resolver
),
ImageClassificationCSVInput(
RunningStage.TESTING, test_data_frame, input_field, target_fields, test_images_root, test_resolver
),
ImageClassificationCSVInput(
RunningStage.PREDICTING,
predict_data_frame,
input_field,
root=predict_images_root,
resolver=predict_resolver,
),
ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -415,19 +423,19 @@ def from_csv(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

train_data = (train_file, input_field, target_fields, train_images_root, train_resolver)
val_data = (val_file, input_field, target_fields, val_images_root, val_resolver)
test_data = (test_file, input_field, target_fields, test_images_root, test_resolver)
predict_data = (predict_file, input_field, predict_images_root, predict_resolver)

return cls(
ImageClassificationCSVInput(
RunningStage.TRAINING, train_file, input_field, target_fields, train_images_root, train_resolver
),
ImageClassificationCSVInput(
RunningStage.VALIDATING, val_file, input_field, target_fields, val_images_root, val_resolver
),
ImageClassificationCSVInput(
RunningStage.TESTING, test_file, input_field, target_fields, test_images_root, test_resolver
),
ImageClassificationCSVInput(
RunningStage.PREDICTING, predict_file, input_field, root=predict_images_root, resolver=predict_resolver
),
ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -454,11 +462,14 @@ def from_fiftyone(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down
102 changes: 92 additions & 10 deletions flash/image/detection/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
from typing import Any, Callable, Dict, Hashable, Optional, Sequence, Tuple, Type, TYPE_CHECKING, Union
from typing import Any, Callable, Dict, Hashable, List, Optional, Sequence, Tuple, Type, TYPE_CHECKING, Union

from flash.core.data.data_module import DataModule
from flash.core.data.data_pipeline import DataPipelineState
from flash.core.data.io.input import DataKeys, InputFormat
from flash.core.data.io.input_transform import InputTransform
from flash.core.integrations.fiftyone.utils import FiftyOneLabelUtilities
Expand Down Expand Up @@ -151,11 +152,12 @@ def __init__(
"coco": partial(IceVisionInput, parser=COCOBBoxParser),
"via": partial(IceVisionInput, parser=VIABBoxParser),
"voc": partial(IceVisionInput, parser=VOCBBoxParser),
"icedata": partial(IceVisionInput, parser=parser),
InputFormat.FILES: IceVisionInput,
InputFormat.FOLDERS: partial(IceVisionInput, parser=parser),
InputFormat.FIFTYONE: ObjectDetectionFiftyOneInput,
},
default_input=InputFormat.FILES,
default_input="icedata",
)

self._default_collate = self._identity
Expand All @@ -179,7 +181,7 @@ class ObjectDetectionData(DataModule):
input_transform_cls = ObjectDetectionInputTransform

@classmethod
def from_folders(
def from_icedata(
cls,
train_folder: Optional[str] = None,
train_ann_file: Optional[str] = None,
Expand All @@ -196,11 +198,14 @@ def from_folders(
parser: Optional[Union[Callable, Type[Parser]]] = None,
**data_module_kwargs,
) -> "ObjectDetectionData":

dataset_kwargs = dict(parser=parser, data_pipeline_state=DataPipelineState())

return cls(
IceVisionInput(RunningStage.TRAINING, train_folder, train_ann_file, parser=parser),
IceVisionInput(RunningStage.VALIDATING, val_folder, val_ann_file, parser=parser),
IceVisionInput(RunningStage.TESTING, test_folder, test_ann_file, parser=parser),
IceVisionInput(RunningStage.PREDICTING, predict_folder, parser=parser),
IceVisionInput(RunningStage.TRAINING, train_folder, train_ann_file, **dataset_kwargs),
IceVisionInput(RunningStage.VALIDATING, val_folder, val_ann_file, **dataset_kwargs),
IceVisionInput(RunningStage.TESTING, test_folder, test_ann_file, **dataset_kwargs),
IceVisionInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -249,7 +254,7 @@ def from_coco(
:class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms.
image_size: The size to resize images (and their bounding boxes) to.
"""
return cls.from_folders(
return cls.from_icedata(
train_folder=train_folder,
train_ann_file=train_ann_file,
val_folder=val_folder,
Expand Down Expand Up @@ -304,7 +309,7 @@ def from_voc(
:class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms.
image_size: The size to resize images (and their bounding boxes) to.
"""
return cls.from_folders(
return cls.from_icedata(
train_folder=train_folder,
train_ann_file=train_ann_file,
val_folder=val_folder,
Expand Down Expand Up @@ -359,7 +364,7 @@ def from_via(
:class:`~flash.core.data.io.input_transform.InputTransform` hook names to callable transforms.
image_size: The size to resize images (and their bounding boxes) to.
"""
return cls.from_folders(
return cls.from_icedata(
train_folder=train_folder,
train_ann_file=train_ann_file,
val_folder=val_folder,
Expand Down Expand Up @@ -407,3 +412,80 @@ def from_fiftyone(
),
**data_module_kwargs,
)

@classmethod
def from_folders(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ethanwharris Not sure about those.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like it 😃

cls,
predict_folder: Optional[str] = None,
predict_transform: Optional[Dict[str, Callable]] = None,
image_size: Tuple[int, int] = (128, 128),
**data_module_kwargs: Any,
) -> "DataModule":
"""Creates a :class:`~flash.core.data.data_module.DataModule` object from the given folders using the
:class:`~flash.core.data.io.input.Input` of name
:attr:`~flash.core.data.io.input.InputFormat.FOLDERS`
from the passed or constructed :class:`~flash.core.data.io.input_transform.InputTransform`.
tchaton marked this conversation as resolved.
Show resolved Hide resolved

Args:
predict_folder: The folder containing the predict data.
predict_transform: The dictionary of transforms to use during predicting which maps
data_module_kwargs: The keywords arguments for creating the datamodule.

Returns:
The constructed data module.
"""
return cls(
None,
None,
None,
IceVisionInput(RunningStage.PREDICTING, predict_folder),
input_transform=cls.input_transform_cls(
None,
None,
None,
predict_transform,
tchaton marked this conversation as resolved.
Show resolved Hide resolved
image_size=image_size,
),
**data_module_kwargs,
)

@classmethod
def from_files(
cls,
predict_files: Optional[List[str]] = None,
predict_transform: Optional[Dict[str, Callable]] = None,
image_size: Tuple[int, int] = (128, 128),
**data_module_kwargs: Any,
) -> "DataModule":
"""Creates a :class:`~flash.core.data.data_module.DataModule` object from the given folders using the
:class:`~flash.core.data.io.input.Input` of name
:attr:`~flash.core.data.io.input.InputFormat.FOLDERS`
from the passed or constructed :class:`~flash.core.data.io.input_transform.InputTransform`.
tchaton marked this conversation as resolved.
Show resolved Hide resolved

Args:
predict_files: The list of files containing the predict data.
predict_transform: The dictionary of transforms to use during predicting which maps
data_module_kwargs: The keywords arguments for creating the datamodule.

Returns:
The constructed data module.
"""
return cls(
None,
None,
None,
IceVisionInput(RunningStage.PREDICTING, predict_files),
input_transform=cls.input_transform_cls(
None,
None,
None,
tchaton marked this conversation as resolved.
Show resolved Hide resolved
predict_transform,
image_size=image_size,
),
**data_module_kwargs,
)

from_tensor = None
from_json = None
from_csv = None
from_datasets = None
Loading