From 595f2660d2793fd8618d7b8d72be693ec18888ab Mon Sep 17 00:00:00 2001 From: KonstantinKorotaev Date: Mon, 23 Aug 2021 18:00:50 +0300 Subject: [PATCH] Fixing DeepSource analysis issues --- flash/core/data/data_module.py | 5 ++--- flash/core/data/data_source.py | 30 ++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/flash/core/data/data_module.py b/flash/core/data/data_module.py index c0ea53dd98..3f43e1241d 100644 --- a/flash/core/data/data_module.py +++ b/flash/core/data/data_module.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json import os import platform from typing import ( @@ -35,7 +34,7 @@ from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.exceptions import MisconfigurationException from torch.utils.data import DataLoader, Dataset -from torch.utils.data.dataset import IterableDataset, random_split, Subset +from torch.utils.data.dataset import IterableDataset, Subset from torch.utils.data.sampler import Sampler import flash @@ -43,7 +42,7 @@ from flash.core.data.base_viz import BaseVisualization from flash.core.data.callback import BaseDataFetcher from flash.core.data.data_pipeline import DataPipeline, DefaultPreprocess, Postprocess, Preprocess -from flash.core.data.data_source import DataSource, DefaultDataSources, LabelStudioDataSource +from flash.core.data.data_source import DataSource, DefaultDataSources from flash.core.data.splits import SplitDataset from flash.core.data.utils import _STAGES_PREFIX from flash.core.utilities.imports import _FIFTYONE_AVAILABLE, requires diff --git a/flash/core/data/data_source.py b/flash/core/data/data_source.py index 085e9510cb..fa786bb9e2 100644 --- a/flash/core/data/data_source.py +++ b/flash/core/data/data_source.py @@ -49,7 +49,9 @@ from flash.core.data.auto_dataset import AutoDataset, BaseAutoDataset, IterableAutoDataset from flash.core.data.properties import ProcessState, Properties from flash.core.data.utils import CurrentRunningStageFuncContext -from flash.core.utilities.imports import _FIFTYONE_AVAILABLE, lazy_import, requires +from flash.core.utilities.imports import _FIFTYONE_AVAILABLE, lazy_import, requires, \ + _PYTORCHVIDEO_AVAILABLE, _TEXT_AVAILABLE +from copy import deepcopy SampleCollection = None if _FIFTYONE_AVAILABLE: @@ -58,13 +60,11 @@ from fiftyone.core.collections import SampleCollection else: fol = None -from copy import deepcopy - -from flash.core.utilities.imports import _PYTORCHVIDEO_AVAILABLE, _TEXT_AVAILABLE if _PYTORCHVIDEO_AVAILABLE: from torchvision.datasets.folder import default_loader + # Credit to the PyTorchVision Team: # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py#L10 def has_file_allowed_extension(filename: str, extensions: Tuple[str, ...]) -> bool: @@ -825,9 +825,11 @@ def _get_labels_from_sample(self, labels): class LabelStudioImageDataSource(LabelStudioDataSource): + """The ``LabelStudioImageDataSource`` expects the input to + :meth:`~flash.core.data.data_source.DataSource.load_data` to be a json export from label studio. + Export data should point to image files""" def __init__(self): super().__init__() - pass def load_sample(self, sample: Mapping[str, Any] = None, dataset: Optional[Any] = None) -> Any: """Load 1 sample from dataset.""" @@ -843,6 +845,10 @@ def load_sample(self, sample: Mapping[str, Any] = None, dataset: Optional[Any] = class LabelStudioTextDataSource(LabelStudioDataSource): + """The ``LabelStudioTextDataSource`` expects the input to + :meth:`~flash.core.data.data_source.DataSource.load_data` to be a json export from label studio. + Export data should point to text data + """ def __init__(self, backbone=None, max_length=128): super().__init__() if backbone: @@ -868,8 +874,11 @@ def load_sample(self, sample: Mapping[str, Any] = None, dataset: Optional[Any] = class LabelStudioVideoDataSource(LabelStudioDataSource): + """The ``LabelStudioVideoDataSource`` expects the input to + :meth:`~flash.core.data.data_source.DataSource.load_data` to be a json export from label studio. + Export data should point to video files""" def __init__( - self, video_sampler=None, clip_sampler=None, clip_duration=1, decode_audio=False, decoder: str = "pyav" + self, video_sampler=None, clip_sampler=None, decode_audio=False, decoder: str = "pyav" ): super().__init__() self.video_sampler = video_sampler or torch.utils.data.RandomSampler @@ -882,12 +891,18 @@ def load_sample(self, sample: Mapping[str, Any] = None, dataset: Optional[Any] = return sample def load_data(self, data: Optional[Any] = None, dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]: + """ + load_data produces a sequence or iterable of samples + """ super().load_data(data, dataset) self.results = self.convert_to_encodedvideo(self.results) self.test_results = self.convert_to_encodedvideo(self.test_results) self.val_results = self.convert_to_encodedvideo(self.val_results) def convert_to_encodedvideo(self, dataset): + """ + Converting dataset to EncodedVideoDataset + """ if len(dataset) > 0: from pytorchvideo.data import EncodedVideoDataset @@ -904,5 +919,4 @@ def convert_to_encodedvideo(self, dataset): decoder=self.decoder, ) return dataset - else: - return [] + return []