Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Data Pipeline V2: Cleanup #1018

Merged
merged 10 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added support for comma delimited multi-label targets to the `ImageClassifier` ([#997](https://github.com/PyTorchLightning/lightning-flash/pull/997))

- Added `datapipeline_state` on dataset creation within the `from_*` methods from the `DataModule` ([#1018](https://github.com/PyTorchLightning/lightning-flash/pull/1018))

### Changed

- Changed `DataSource` to `Input` ([#929](https://github.com/PyTorchLightning/lightning-flash/pull/929))
Expand All @@ -34,6 +36,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added `Output` suffix to `Preds`, `FiftyOneDetectionLabels`, `SegmentationLabels`, `FiftyOneDetectionLabels`, `DetectionLabels`, `Classes`, `FiftyOneLabels`, `Labels`, `Logits`, `Probabilities` ([#1011](https://github.com/PyTorchLightning/lightning-flash/pull/1011))


- Changed `from_files` and `from_folders` from `ObjectDetectionData`, `InstanceSegmentationData`, `KeypointDetectionData` to support only the `predicting` stage ([#1018](https://github.com/PyTorchLightning/lightning-flash/pull/1018))

### Deprecated

- Deprecated `flash.core.data.process.Serializer` in favour of `flash.core.data.io.output.Output` ([#927](https://github.com/PyTorchLightning/lightning-flash/pull/927))
Expand Down
2 changes: 1 addition & 1 deletion flash/core/integrations/icevision/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def load_data(
def predict_load_data(
self, paths: Union[str, List[str]], ann_file: Optional[str] = None, parser: Optional[Type["Parser"]] = None
) -> List[Dict[str, Any]]:
if parser is not None:
if parser is not None and parser != Parser:
return self.load_data(paths, ann_file, parser)
paths = list_valid_files(paths, valid_extensions=IMG_EXTENSIONS + NP_EXTENSIONS)
return [{DataKeys.INPUT: path} for path in paths]
Expand Down
4 changes: 3 additions & 1 deletion flash/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,9 @@ def predict(
# <hack> Temporary fix to support new `Input` object
input = data_pipeline._input_transform_pipeline.input_of_name(input or "default")

if inspect.isclass(input) and issubclass(input, NewInputBase):
if (inspect.isclass(input) and issubclass(input, NewInputBase)) or (
isinstance(input, functools.partial) and issubclass(input.func, NewInputBase)
):
dataset = input(running_stage, x, data_pipeline_state=self._data_pipeline_state)
else:
dataset = input.generate_dataset(x, running_stage)
Expand Down
3 changes: 2 additions & 1 deletion flash/core/utilities/flash_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def add_arguments_to_parser(self, parser) -> None:
)
or (not hasattr(DataModule, function) and not self.legacy)
):
self.add_subcommand_from_function(subcommands, getattr(self.local_datamodule_class, function))
if getattr(self.local_datamodule_class, function) is not None:
self.add_subcommand_from_function(subcommands, getattr(self.local_datamodule_class, function))

for datamodule_builder in self.additional_datamodule_builders:
self.add_subcommand_from_function(subcommands, datamodule_builder)
Expand Down
20 changes: 16 additions & 4 deletions flash/graph/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from torch.utils.data import Dataset

from flash.core.data.data_module import DataModule
from flash.core.data.data_pipeline import DataPipelineState
from flash.core.data.io.input import InputFormat
from flash.core.data.io.input_transform import InputTransform
from flash.core.utilities.imports import _GRAPH_AVAILABLE
Expand Down Expand Up @@ -74,11 +75,14 @@ def from_datasets(
predict_transform: Optional[Dict[str, Callable]] = None,
**data_module_kwargs,
) -> "GraphClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
GraphDatasetInput(RunningStage.TRAINING, train_dataset),
GraphDatasetInput(RunningStage.VALIDATING, val_dataset),
GraphDatasetInput(RunningStage.TESTING, test_dataset),
GraphDatasetInput(RunningStage.PREDICTING, predict_dataset),
GraphDatasetInput(RunningStage.TRAINING, train_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.VALIDATING, val_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.TESTING, test_dataset, **dataset_kwargs),
GraphDatasetInput(RunningStage.PREDICTING, predict_dataset, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -94,3 +98,11 @@ def num_features(self):
n_cls_val = getattr(self.val_dataset, "num_features", None)
n_cls_test = getattr(self.test_dataset, "num_features", None)
return n_cls_train or n_cls_val or n_cls_test

from_folders = None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ethanwharris Do we want to do this for now? Just an idea, but might not be worth it.

from_files = None
from_tensors = None
from_numpy = None
from_json = None
from_csv = None
from_fiftyone = None
107 changes: 59 additions & 48 deletions flash/image/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,14 @@ def from_files(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets),
ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets),
ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets),
ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files),
ImageClassificationFilesInput(RunningStage.TRAINING, train_files, train_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.VALIDATING, val_files, val_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.TESTING, test_files, test_targets, **dataset_kwargs),
ImageClassificationFilesInput(RunningStage.PREDICTING, predict_files, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -259,11 +262,14 @@ def from_folders(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFolderInput(RunningStage.TRAINING, train_folder),
ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder),
ImageClassificationFolderInput(RunningStage.TESTING, test_folder),
ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder),
ImageClassificationFolderInput(RunningStage.TRAINING, train_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.VALIDATING, val_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.TESTING, test_folder, **dataset_kwargs),
ImageClassificationFolderInput(RunningStage.PREDICTING, predict_folder, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -291,11 +297,14 @@ def from_numpy(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets),
ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets),
ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets),
ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data),
ImageClassificationNumpyInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs),
ImageClassificationNumpyInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -323,11 +332,14 @@ def from_tensors(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets),
ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets),
ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets),
ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data),
ImageClassificationTensorInput(RunningStage.TRAINING, train_data, train_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.VALIDATING, val_data, val_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.TESTING, test_data, test_targets, **dataset_kwargs),
ImageClassificationTensorInput(RunningStage.PREDICTING, predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -362,23 +374,19 @@ def from_data_frame(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

train_data = (train_data_frame, input_field, target_fields, train_images_root, train_resolver)
val_data = (val_data_frame, input_field, target_fields, val_images_root, val_resolver)
test_data = (test_data_frame, input_field, target_fields, test_images_root, test_resolver)
predict_data = (predict_data_frame, input_field, predict_images_root, predict_resolver)

return cls(
ImageClassificationDataFrameInput(
RunningStage.TRAINING, train_data_frame, input_field, target_fields, train_images_root, train_resolver
),
ImageClassificationCSVInput(
RunningStage.VALIDATING, val_data_frame, input_field, target_fields, val_images_root, val_resolver
),
ImageClassificationCSVInput(
RunningStage.TESTING, test_data_frame, input_field, target_fields, test_images_root, test_resolver
),
ImageClassificationCSVInput(
RunningStage.PREDICTING,
predict_data_frame,
input_field,
root=predict_images_root,
resolver=predict_resolver,
),
ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down Expand Up @@ -413,19 +421,19 @@ def from_csv(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs: Any,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

train_data = (train_file, input_field, target_fields, train_images_root, train_resolver)
val_data = (val_file, input_field, target_fields, val_images_root, val_resolver)
test_data = (test_file, input_field, target_fields, test_images_root, test_resolver)
predict_data = (predict_file, input_field, predict_images_root, predict_resolver)

return cls(
ImageClassificationCSVInput(
RunningStage.TRAINING, train_file, input_field, target_fields, train_images_root, train_resolver
),
ImageClassificationCSVInput(
RunningStage.VALIDATING, val_file, input_field, target_fields, val_images_root, val_resolver
),
ImageClassificationCSVInput(
RunningStage.TESTING, test_file, input_field, target_fields, test_images_root, test_resolver
),
ImageClassificationCSVInput(
RunningStage.PREDICTING, predict_file, input_field, root=predict_images_root, resolver=predict_resolver
),
ImageClassificationCSVInput(RunningStage.TRAINING, *train_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.VALIDATING, *val_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.TESTING, *test_data, **dataset_kwargs),
ImageClassificationCSVInput(RunningStage.PREDICTING, *predict_data, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand All @@ -452,11 +460,14 @@ def from_fiftyone(
image_size: Tuple[int, int] = (196, 196),
**data_module_kwargs,
) -> "ImageClassificationData":

dataset_kwargs = dict(data_pipeline_state=DataPipelineState())

return cls(
ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field),
ImageClassificationFiftyOneInput(RunningStage.TRAINING, train_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.VALIDATING, val_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.TESTING, test_dataset, label_field, **dataset_kwargs),
ImageClassificationFiftyOneInput(RunningStage.PREDICTING, predict_dataset, label_field, **dataset_kwargs),
input_transform=cls.input_transform_cls(
train_transform,
val_transform,
Expand Down
Loading