diff --git a/README.md b/README.md index 6c60b5ceb0..76273ac058 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ download_data("https://pl-flash-data.s3.amazonaws.com/hymenoptera_data.zip", 'da # 2. Load the data datamodule = ImageClassificationData.from_folders( train_folder="data/hymenoptera_data/train/", - valid_folder="data/hymenoptera_data/val/", + val_folder="data/hymenoptera_data/val/", test_folder="data/hymenoptera_data/test/", ) @@ -205,11 +205,11 @@ download_data("https://pl-flash-data.s3.amazonaws.com/xsum.zip", 'data/') # 2. Load the data datamodule = SummarizationData.from_files( - train_file="data/xsum/train.csv", - valid_file="data/xsum/valid.csv", - test_file="data/xsum/test.csv", - input="input", - target="target" + train_file="data/xsum/train.csv", + val_file="data/xsum/valid.csv", + test_file="data/xsum/test.csv", + input="input", + target="target" ) # 3. Build the model diff --git a/flash/data/data_module.py b/flash/data/data_module.py index 2db64e457b..641eff21d7 100644 --- a/flash/data/data_module.py +++ b/flash/data/data_module.py @@ -32,7 +32,7 @@ class DataModule(pl.LightningDataModule): Args: train_dataset: Dataset for training. Defaults to None. - valid_dataset: Dataset for validating model performance during training. Defaults to None. + val_dataset: Dataset for validating model performance during training. Defaults to None. test_dataset: Dataset to test model performance. Defaults to None. predict_dataset: Dataset to predict model performance. Defaults to None. num_workers: The number of workers to use for parallelized loading. Defaults to None. @@ -49,7 +49,7 @@ class DataModule(pl.LightningDataModule): def __init__( self, train_dataset: Optional[Dataset] = None, - valid_dataset: Optional[Dataset] = None, + val_dataset: Optional[Dataset] = None, test_dataset: Optional[Dataset] = None, predict_dataset: Optional[Dataset] = None, batch_size: int = 1, @@ -58,14 +58,14 @@ def __init__( super().__init__() self._train_ds = train_dataset - self._valid_ds = valid_dataset + self._val_ds = val_dataset self._test_ds = test_dataset self._predict_ds = predict_dataset if self._train_ds: self.train_dataloader = self._train_dataloader - if self._valid_ds: + if self._val_ds: self.val_dataloader = self._val_dataloader if self._test_ds: @@ -104,8 +104,8 @@ def set_running_stages(self): if self._train_ds: self.set_dataset_attribute(self._train_ds, 'running_stage', RunningStage.TRAINING) - if self._valid_ds: - self.set_dataset_attribute(self._valid_ds, 'running_stage', RunningStage.VALIDATING) + if self._val_ds: + self.set_dataset_attribute(self._val_ds, 'running_stage', RunningStage.VALIDATING) if self._test_ds: self.set_dataset_attribute(self._test_ds, 'running_stage', RunningStage.TESTING) @@ -130,13 +130,13 @@ def _train_dataloader(self) -> DataLoader: ) def _val_dataloader(self) -> DataLoader: - valid_ds: Dataset = self._valid_ds() if isinstance(self._valid_ds, Callable) else self._valid_ds + val_ds: Dataset = self._val_ds() if isinstance(self._val_ds, Callable) else self._val_ds return DataLoader( - valid_ds, + val_ds, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True, - collate_fn=self._resolve_collate_fn(valid_ds, RunningStage.VALIDATING) + collate_fn=self._resolve_collate_fn(val_ds, RunningStage.VALIDATING) ) def _test_dataloader(self) -> DataLoader: @@ -214,10 +214,10 @@ def autogenerate_dataset( return AutoDataset(data, whole_data_load_fn, per_sample_load_fn, data_pipeline, running_stage=running_stage) @staticmethod - def train_valid_test_split( + def train_val_test_split( dataset: torch.utils.data.Dataset, train_split: Optional[Union[float, int]] = None, - valid_split: Optional[Union[float, int]] = None, + val_split: Optional[Union[float, int]] = None, test_split: Optional[Union[float, int]] = None, seed: Optional[int] = 1234, ) -> Tuple[Dataset, Optional[Dataset], Optional[Dataset]]: @@ -227,11 +227,11 @@ def train_valid_test_split( dataset: Dataset to be split. train_split: If Float, ratio of data to be contained within the train dataset. If Int, number of samples to be contained within train dataset. - valid_split: If Float, ratio of data to be contained within the validation dataset. If Int, + val_split: If Float, ratio of data to be contained within the validation dataset. If Int, number of samples to be contained within test dataset. test_split: If Float, ratio of data to be contained within the test dataset. If Int, number of samples to be contained within test dataset. - seed: Used for the train/val splits when valid_split is not None. + seed: Used for the train/val splits when val_split is not None. """ n = len(dataset) @@ -243,12 +243,12 @@ def train_valid_test_split( else: _test_length = test_split - if valid_split is None: + if val_split is None: _val_length = 0 - elif isinstance(valid_split, float): - _val_length = int(n * valid_split) + elif isinstance(val_split, float): + _val_length = int(n * val_split) else: - _val_length = valid_split + _val_length = val_split if train_split is None: _train_length = n - _val_length - _test_length @@ -265,7 +265,7 @@ def train_valid_test_split( train_ds, val_ds, test_ds = torch.utils.data.random_split( dataset, [_train_length, _val_length, _test_length], generator ) - if valid_split is None: + if val_split is None: val_ds = None if test_split is None: test_ds = None @@ -293,7 +293,7 @@ def _generate_dataset_if_possible( def from_load_data_inputs( cls, train_load_data_input: Optional[Any] = None, - valid_load_data_input: Optional[Any] = None, + val_load_data_input: Optional[Any] = None, test_load_data_input: Optional[Any] = None, predict_load_data_input: Optional[Any] = None, preprocess: Optional[Preprocess] = None, @@ -306,7 +306,7 @@ def from_load_data_inputs( Args: cls: ``DataModule`` subclass train_load_data_input: Data to be received by the ``train_load_data`` function from this ``Preprocess`` - valid_load_data_input: Data to be received by the ``val_load_data`` function from this ``Preprocess`` + val_load_data_input: Data to be received by the ``val_load_data`` function from this ``Preprocess`` test_load_data_input: Data to be received by the ``test_load_data`` function from this ``Preprocess`` predict_load_data_input: Data to be received by the ``predict_load_data`` function from this ``Preprocess`` kwargs: Any extra arguments to instantiate the provided ``DataModule`` @@ -322,8 +322,8 @@ def from_load_data_inputs( train_dataset = cls._generate_dataset_if_possible( train_load_data_input, running_stage=RunningStage.TRAINING, data_pipeline=data_pipeline ) - valid_dataset = cls._generate_dataset_if_possible( - valid_load_data_input, running_stage=RunningStage.VALIDATING, data_pipeline=data_pipeline + val_dataset = cls._generate_dataset_if_possible( + val_load_data_input, running_stage=RunningStage.VALIDATING, data_pipeline=data_pipeline ) test_dataset = cls._generate_dataset_if_possible( test_load_data_input, running_stage=RunningStage.TESTING, data_pipeline=data_pipeline @@ -333,7 +333,7 @@ def from_load_data_inputs( ) datamodule = cls( train_dataset=train_dataset, - valid_dataset=valid_dataset, + val_dataset=val_dataset, test_dataset=test_dataset, predict_dataset=predict_dataset, **kwargs diff --git a/flash/data/process.py b/flash/data/process.py index fbeb531fca..f61220dc11 100644 --- a/flash/data/process.py +++ b/flash/data/process.py @@ -87,13 +87,13 @@ class Preprocess(Properties, torch.nn.Module): def __init__( self, train_transform: Optional[Union[Callable, Module, Dict[str, Callable]]] = None, - valid_transform: Optional[Union[Callable, Module, Dict[str, Callable]]] = None, + val_transform: Optional[Union[Callable, Module, Dict[str, Callable]]] = None, test_transform: Optional[Union[Callable, Module, Dict[str, Callable]]] = None, predict_transform: Optional[Union[Callable, Module, Dict[str, Callable]]] = None, ): super().__init__() self.train_transform = convert_to_modules(train_transform) - self.valid_transform = convert_to_modules(valid_transform) + self.val_transform = convert_to_modules(val_transform) self.test_transform = convert_to_modules(test_transform) self.predict_transform = convert_to_modules(predict_transform) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index 6909a29d88..fa62d4e6ca 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -83,7 +83,7 @@ def state(self) -> TabularState: @staticmethod def generate_state( train_df: DataFrame, - valid_df: Optional[DataFrame], + val_df: Optional[DataFrame], test_df: Optional[DataFrame], predict_df: Optional[DataFrame], target_col: str, @@ -100,8 +100,8 @@ def generate_state( dfs = [train_df] - if valid_df is not None: - dfs += [valid_df] + if val_df is not None: + dfs += [val_df] if test_df is not None: dfs += [test_df] @@ -197,7 +197,7 @@ def from_csv( train_csv: Optional[str] = None, categorical_cols: Optional[List] = None, numerical_cols: Optional[List] = None, - valid_csv: Optional[str] = None, + val_csv: Optional[str] = None, test_csv: Optional[str] = None, predict_csv: Optional[str] = None, batch_size: int = 8, @@ -215,7 +215,7 @@ def from_csv( target_col: The column containing the class id. categorical_cols: The list of categorical columns. numerical_cols: The list of numerical columns. - valid_csv: Validation data csv file. + val_csv: Validation data csv file. test_csv: Test data csv file. batch_size: The batchsize to use for parallel loading. Defaults to 64. num_workers: The number of workers to use for parallelized loading. @@ -234,7 +234,7 @@ def from_csv( text_data = TabularData.from_files("train.csv", label_field="class", text_field="sentence") """ train_df = pd.read_csv(train_csv, **pandas_kwargs) - valid_df = pd.read_csv(valid_csv, **pandas_kwargs) if valid_csv else None + val_df = pd.read_csv(val_csv, **pandas_kwargs) if val_csv else None test_df = pd.read_csv(test_csv, **pandas_kwargs) if test_csv else None predict_df = pd.read_csv(predict_csv, **pandas_kwargs) if predict_csv else None @@ -243,7 +243,7 @@ def from_csv( target_col, categorical_cols, numerical_cols, - valid_df, + val_df, test_df, predict_df, batch_size, @@ -268,21 +268,21 @@ def emb_sizes(self) -> list: @staticmethod def _split_dataframe( train_df: DataFrame, - valid_df: Optional[DataFrame] = None, + val_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, val_size: float = None, test_size: float = None, ): - if valid_df is None and isinstance(val_size, float) and isinstance(test_size, float): + if val_df is None and isinstance(val_size, float) and isinstance(test_size, float): assert 0 < val_size < 1 assert 0 < test_size < 1 - train_df, valid_df = train_test_split(train_df, test_size=(val_size + test_size)) + train_df, val_df = train_test_split(train_df, test_size=(val_size + test_size)) if test_df is None and isinstance(test_size, float): assert 0 < test_size < 1 - valid_df, test_df = train_test_split(valid_df, test_size=test_size) + val_df, test_df = train_test_split(val_df, test_size=test_size) - return train_df, valid_df, test_df + return train_df, val_df, test_df @staticmethod def _sanetize_cols(cat_cols: Optional[List], num_cols: Optional[List]): @@ -298,7 +298,7 @@ def from_df( target_col: str, categorical_cols: Optional[List] = None, numerical_cols: Optional[List] = None, - valid_df: Optional[DataFrame] = None, + val_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, predict_df: Optional[DataFrame] = None, batch_size: int = 8, @@ -316,7 +316,7 @@ def from_df( target_col: The column containing the class id. categorical_cols: The list of categorical columns. numerical_cols: The list of numerical columns. - valid_df: Validation data DataFrame. + val_df: Validation data DataFrame. test_df: Test data DataFrame. batch_size: The batchsize to use for parallel loading. Defaults to 64. num_workers: The number of workers to use for parallelized loading. @@ -334,13 +334,13 @@ def from_df( """ categorical_cols, numerical_cols = cls._sanetize_cols(categorical_cols, numerical_cols) - train_df, valid_df, test_df = cls._split_dataframe(train_df, valid_df, test_df, val_size, test_size) + train_df, val_df, test_df = cls._split_dataframe(train_df, val_df, test_df, val_size, test_size) preprocess_cls = preprocess_cls or cls.preprocess_cls preprocess_state = preprocess_cls.generate_state( train_df, - valid_df, + val_df, test_df, predict_df, target_col, @@ -353,7 +353,7 @@ def from_df( return cls.from_load_data_inputs( train_load_data_input=train_df, - valid_load_data_input=valid_df, + val_load_data_input=val_df, test_load_data_input=test_df, predict_load_data_input=predict_df, batch_size=batch_size, diff --git a/flash/text/classification/data.py b/flash/text/classification/data.py index 4b24f82424..b1ac5b8990 100644 --- a/flash/text/classification/data.py +++ b/flash/text/classification/data.py @@ -237,7 +237,7 @@ def from_files( target: Optional[str] = 'labels', filetype: str = "csv", backbone: str = "prajjwal1/bert-tiny", - valid_file: Optional[str] = None, + val_file: Optional[str] = None, test_file: Optional[str] = None, predict_file: Optional[str] = None, max_length: int = 128, @@ -255,7 +255,7 @@ def from_files( target: The field storing the class id of the associated text. filetype: .csv or .json backbone: Tokenizer backbone to use, can use any HuggingFace tokenizer. - valid_file: Path to validation data. + val_file: Path to validation data. test_file: Path to test data. batch_size: the batchsize to use for parallel loading. Defaults to 64. num_workers: The number of workers to use for parallelized loading. @@ -287,7 +287,7 @@ def from_files( return cls.from_load_data_inputs( train_load_data_input=train_file, - valid_load_data_input=valid_file, + val_load_data_input=val_file, test_load_data_input=test_file, predict_load_data_input=predict_file, batch_size=batch_size, @@ -327,7 +327,7 @@ def from_file( target=None, filetype=filetype, backbone=backbone, - valid_file=None, + val_file=None, test_file=None, predict_file=predict_file, max_length=max_length, diff --git a/flash/text/seq2seq/core/data.py b/flash/text/seq2seq/core/data.py index 2d9b9e98d6..9757c79516 100644 --- a/flash/text/seq2seq/core/data.py +++ b/flash/text/seq2seq/core/data.py @@ -168,7 +168,7 @@ def from_files( target: Optional[str] = None, filetype: str = "csv", backbone: str = "sshleifer/tiny-mbart", - valid_file: Optional[str] = None, + val_file: Optional[str] = None, test_file: Optional[str] = None, predict_file: Optional[str] = None, max_source_length: int = 128, @@ -185,7 +185,7 @@ def from_files( target: The field storing the target translation text. filetype: ``csv`` or ``json`` File backbone: Tokenizer backbone to use, can use any HuggingFace tokenizer. - valid_file: Path to validation data. + val_file: Path to validation data. test_file: Path to test data. max_source_length: Maximum length of the source text. Any text longer will be truncated. max_target_length: Maximum length of the target text. Any text longer will be truncated. @@ -217,7 +217,7 @@ def from_files( return cls.from_load_data_inputs( train_load_data_input=train_file, - valid_load_data_input=valid_file, + val_load_data_input=val_file, test_load_data_input=test_file, predict_load_data_input=predict_file, batch_size=batch_size, diff --git a/flash/text/seq2seq/summarization/data.py b/flash/text/seq2seq/summarization/data.py index ba9b93b6e0..b6ecfc05df 100644 --- a/flash/text/seq2seq/summarization/data.py +++ b/flash/text/seq2seq/summarization/data.py @@ -54,7 +54,7 @@ def from_files( target: Optional[str] = None, filetype: str = "csv", backbone: str = "t5-small", - valid_file: str = None, + val_file: str = None, test_file: str = None, predict_file: str = None, max_source_length: int = 512, @@ -73,7 +73,7 @@ def from_files( target: The field storing the target translation text. filetype: .csv or .json backbone: Tokenizer backbone to use, can use any HuggingFace tokenizer. - valid_file: Path to validation data. + val_file: Path to validation data. test_file: Path to test data. max_source_length: Maximum length of the source text. Any text longer will be truncated. max_target_length: Maximum length of the target text. Any text longer will be truncated. @@ -110,7 +110,7 @@ def from_files( return cls.from_load_data_inputs( train_load_data_input=train_file, - valid_load_data_input=valid_file, + val_load_data_input=val_file, test_load_data_input=test_file, predict_load_data_input=predict_file, batch_size=batch_size, diff --git a/flash/text/seq2seq/translation/data.py b/flash/text/seq2seq/translation/data.py index 92096b431a..46bd9ebe8e 100644 --- a/flash/text/seq2seq/translation/data.py +++ b/flash/text/seq2seq/translation/data.py @@ -28,7 +28,7 @@ def from_files( target: Optional[str] = None, filetype="csv", backbone="facebook/mbart-large-en-ro", - valid_file=None, + val_file=None, test_file=None, predict_file=None, max_source_length: int = 128, @@ -46,7 +46,7 @@ def from_files( target: The field storing the target translation text. filetype: .csv or .json backbone: Tokenizer backbone to use, can use any HuggingFace tokenizer. - valid_file: Path to validation data. + val_file: Path to validation data. test_file: Path to test data. predict_file: Path to predict data. max_source_length: Maximum length of the source text. Any text longer will be truncated. @@ -70,7 +70,7 @@ def from_files( """ return super().from_files( train_file=train_file, - valid_file=valid_file, + val_file=val_file, test_file=test_file, predict_file=predict_file, input=input, diff --git a/flash/vision/classification/data.py b/flash/vision/classification/data.py index 35bb54e60f..37baff9440 100644 --- a/flash/vision/classification/data.py +++ b/flash/vision/classification/data.py @@ -177,7 +177,7 @@ def train_pre_tensor_transform(self, sample: Any) -> Any: def val_pre_tensor_transform(self, sample: Any) -> Any: source, target = sample - return self.common_pre_tensor_transform(source, self.valid_transform), target + return self.common_pre_tensor_transform(source, self.val_transform), target def test_pre_tensor_transform(self, sample: Any) -> Any: source, target = sample @@ -206,7 +206,7 @@ def train_post_tensor_transform(self, sample: Any) -> Any: def val_post_tensor_transform(self, sample: Any) -> Any: source, target = sample - return self.common_post_tensor_transform(source, self.valid_transform), target + return self.common_post_tensor_transform(source, self.val_transform), target def test_post_tensor_transform(self, sample: Any) -> Any: source, target = sample @@ -229,27 +229,27 @@ class ImageClassificationData(DataModule): def __init__( self, train_dataset: Optional[Dataset] = None, - valid_dataset: Optional[Dataset] = None, + val_dataset: Optional[Dataset] = None, test_dataset: Optional[Dataset] = None, predict_dataset: Optional[Dataset] = None, batch_size: int = 1, num_workers: Optional[int] = None, seed: int = 1234, train_split: Optional[Union[float, int]] = None, - valid_split: Optional[Union[float, int]] = None, + val_split: Optional[Union[float, int]] = None, test_split: Optional[Union[float, int]] = None, **kwargs, ) -> 'ImageClassificationData': """Creates a ImageClassificationData object from lists of image filepaths and labels""" - if train_dataset is not None and train_split is not None or valid_split is not None or test_split is not None: - train_dataset, valid_dataset, test_dataset = self.train_valid_test_split( - train_dataset, train_split, valid_split, test_split, seed + if train_dataset is not None and train_split is not None or val_split is not None or test_split is not None: + train_dataset, val_dataset, test_dataset = self.train_val_test_split( + train_dataset, train_split, val_split, test_split, seed ) super().__init__( train_dataset=train_dataset, - valid_dataset=valid_dataset, + val_dataset=val_dataset, test_dataset=test_dataset, predict_dataset=predict_dataset, batch_size=batch_size, @@ -261,8 +261,8 @@ def __init__( if self._train_ds: self.set_dataset_attribute(self._train_ds, 'num_classes', self.num_classes) - if self._valid_ds: - self.set_dataset_attribute(self._valid_ds, 'num_classes', self.num_classes) + if self._val_ds: + self.set_dataset_attribute(self._val_ds, 'num_classes', self.num_classes) if self._test_ds: self.set_dataset_attribute(self._test_ds, 'num_classes', self.num_classes) @@ -298,7 +298,7 @@ def default_train_transforms(): } @staticmethod - def default_valid_transforms(): + def default_val_transforms(): image_size = ImageClassificationData.image_size if _KORNIA_AVAILABLE and not os.getenv("FLASH_TESTING", "0") == "1": # Better approach as all transforms are applied on tensor directly @@ -334,7 +334,7 @@ def _get_num_classes(self, dataset: torch.utils.data.Dataset): def instantiate_preprocess( cls, train_transform: Dict[str, Union[nn.Module, Callable]], - valid_transform: Dict[str, Union[nn.Module, Callable]], + val_transform: Dict[str, Union[nn.Module, Callable]], test_transform: Dict[str, Union[nn.Module, Callable]], predict_transform: Dict[str, Union[nn.Module, Callable]], preprocess_cls: Type[Preprocess] = None @@ -344,7 +344,7 @@ def instantiate_preprocess( Args: train_transform: Train transforms for images. - valid_transform: Validation transforms for images. + val_transform: Validation transforms for images. test_transform: Test transforms for images. predict_transform: Predict transforms for images. preprocess_cls: User provided preprocess_cls. @@ -362,18 +362,18 @@ def instantiate_preprocess( } """ - train_transform, valid_transform, test_transform, predict_transform = cls._resolve_transforms( - train_transform, valid_transform, test_transform, predict_transform + train_transform, val_transform, test_transform, predict_transform = cls._resolve_transforms( + train_transform, val_transform, test_transform, predict_transform ) preprocess_cls = preprocess_cls or cls.preprocess_cls - return preprocess_cls(train_transform, valid_transform, test_transform, predict_transform) + return preprocess_cls(train_transform, val_transform, test_transform, predict_transform) @classmethod def _resolve_transforms( cls, train_transform: Optional[Union[str, Dict]] = 'default', - valid_transform: Optional[Union[str, Dict]] = 'default', + val_transform: Optional[Union[str, Dict]] = 'default', test_transform: Optional[Union[str, Dict]] = 'default', predict_transform: Optional[Union[str, Dict]] = 'default', ): @@ -381,17 +381,17 @@ def _resolve_transforms( if not train_transform or train_transform == 'default': train_transform = cls.default_train_transforms() - if not valid_transform or valid_transform == 'default': - valid_transform = cls.default_valid_transforms() + if not val_transform or val_transform == 'default': + val_transform = cls.default_val_transforms() if not test_transform or test_transform == 'default': - test_transform = cls.default_valid_transforms() + test_transform = cls.default_val_transforms() if not predict_transform or predict_transform == 'default': - predict_transform = cls.default_valid_transforms() + predict_transform = cls.default_val_transforms() return ( - cls._check_transforms(train_transform), cls._check_transforms(valid_transform), + cls._check_transforms(train_transform), cls._check_transforms(val_transform), cls._check_transforms(test_transform), cls._check_transforms(predict_transform) ) @@ -399,11 +399,11 @@ def _resolve_transforms( def from_folders( cls, train_folder: Optional[Union[str, pathlib.Path]] = None, - valid_folder: Optional[Union[str, pathlib.Path]] = None, + val_folder: Optional[Union[str, pathlib.Path]] = None, test_folder: Optional[Union[str, pathlib.Path]] = None, predict_folder: Union[str, pathlib.Path] = None, train_transform: Optional[Union[str, Dict]] = 'default', - valid_transform: Optional[Union[str, Dict]] = 'default', + val_transform: Optional[Union[str, Dict]] = 'default', test_transform: Optional[Union[str, Dict]] = 'default', predict_transform: Optional[Union[str, Dict]] = 'default', batch_size: int = 4, @@ -423,12 +423,12 @@ def from_folders( Args: train_folder: Path to training folder. Default: None. - valid_folder: Path to validation folder. Default: None. + val_folder: Path to validation folder. Default: None. test_folder: Path to test folder. Default: None. predict_folder: Path to predict folder. Default: None. - valid_transform: Image transform to use for validation and test set. + val_transform: Image transform to use for validation and test set. train_transform: Image transform to use for training set. - valid_transform: Image transform to use for validation set. + val_transform: Image transform to use for validation set. test_transform: Image transform to use for test set. predict_transform: Image transform to use for predict set. batch_size: Batch size for data loading. @@ -444,7 +444,7 @@ def from_folders( """ preprocess = cls.instantiate_preprocess( train_transform, - valid_transform, + val_transform, test_transform, predict_transform, preprocess_cls=preprocess_cls, @@ -452,7 +452,7 @@ def from_folders( return cls.from_load_data_inputs( train_load_data_input=train_folder, - valid_load_data_input=valid_folder, + val_load_data_input=val_folder, test_load_data_input=test_folder, predict_load_data_input=predict_folder, batch_size=batch_size, @@ -466,13 +466,13 @@ def from_filepaths( cls, train_filepaths: Optional[Union[str, pathlib.Path, Sequence[Union[str, pathlib.Path]]]] = None, train_labels: Optional[Sequence] = None, - valid_filepaths: Optional[Union[str, pathlib.Path, Sequence[Union[str, pathlib.Path]]]] = None, - valid_labels: Optional[Sequence] = None, + val_filepaths: Optional[Union[str, pathlib.Path, Sequence[Union[str, pathlib.Path]]]] = None, + val_labels: Optional[Sequence] = None, test_filepaths: Optional[Union[str, pathlib.Path, Sequence[Union[str, pathlib.Path]]]] = None, test_labels: Optional[Sequence] = None, predict_filepaths: Optional[Union[str, pathlib.Path, Sequence[Union[str, pathlib.Path]]]] = None, train_transform: Optional[Callable] = 'default', - valid_transform: Optional[Callable] = 'default', + val_transform: Optional[Callable] = 'default', batch_size: int = 64, num_workers: Optional[int] = None, seed: Optional[int] = 42, @@ -491,17 +491,17 @@ def from_filepaths( Args: train_filepaths: String or sequence of file paths for training dataset. Defaults to ``None``. train_labels: Sequence of labels for training dataset. Defaults to ``None``. - valid_filepaths: String or sequence of file paths for validation dataset. Defaults to ``None``. - valid_labels: Sequence of labels for validation dataset. Defaults to ``None``. + val_filepaths: String or sequence of file paths for validation dataset. Defaults to ``None``. + val_labels: Sequence of labels for validation dataset. Defaults to ``None``. test_filepaths: String or sequence of file paths for test dataset. Defaults to ``None``. test_labels: Sequence of labels for test dataset. Defaults to ``None``. train_transform: Transforms for training dataset. Defaults to ``default``, which loads imagenet transforms. - valid_transform: Transforms for validation and testing dataset. + val_transform: Transforms for validation and testing dataset. Defaults to ``default``, which loads imagenet transforms. batch_size: The batchsize to use for parallel loading. Defaults to ``64``. num_workers: The number of workers to use for parallelized loading. Defaults to ``None`` which equals the number of available CPU threads. - seed: Used for the train/val splits when valid_split is not None. + seed: Used for the train/val splits. Returns: ImageClassificationData: The constructed data module. @@ -512,15 +512,15 @@ def from_filepaths( Example when labels are in .csv file:: train_labels = labels_from_categorical_csv('path/to/train.csv', 'my_id') - valid_labels = labels_from_categorical_csv(path/to/valid.csv', 'my_id') + val_labels = labels_from_categorical_csv(path/to/val.csv', 'my_id') test_labels = labels_from_categorical_csv(path/to/tests.csv', 'my_id') data = ImageClassificationData.from_filepaths( batch_size=2, train_filepaths='path/to/train', train_labels=train_labels, - valid_filepaths='path/to/valid', - valid_labels=valid_labels, + val_filepaths='path/to/val', + val_labels=val_labels, test_filepaths='path/to/test', test_labels=test_labels, ) @@ -532,11 +532,11 @@ def from_filepaths( train_filepaths = [os.path.join(train_filepaths, x) for x in os.listdir(train_filepaths)] else: train_filepaths = [train_filepaths] - if isinstance(valid_filepaths, str): - if os.path.isdir(valid_filepaths): - valid_filepaths = [os.path.join(valid_filepaths, x) for x in os.listdir(valid_filepaths)] + if isinstance(val_filepaths, str): + if os.path.isdir(val_filepaths): + val_filepaths = [os.path.join(val_filepaths, x) for x in os.listdir(val_filepaths)] else: - valid_filepaths = [valid_filepaths] + val_filepaths = [val_filepaths] if isinstance(test_filepaths, str): if os.path.isdir(test_filepaths): test_filepaths = [os.path.join(test_filepaths, x) for x in os.listdir(test_filepaths)] @@ -555,12 +555,12 @@ def from_filepaths( else: train_dataset = None - if valid_filepaths is not None and valid_labels is not None: - valid_dataset = cls._generate_dataset_if_possible( - list(zip(valid_filepaths, valid_labels)), running_stage=RunningStage.VALIDATING + if val_filepaths is not None and val_labels is not None: + val_dataset = cls._generate_dataset_if_possible( + list(zip(val_filepaths, val_labels)), running_stage=RunningStage.VALIDATING ) else: - valid_dataset = None + val_dataset = None if test_filepaths is not None and test_labels is not None: test_dataset = cls._generate_dataset_if_possible( @@ -578,11 +578,11 @@ def from_filepaths( return cls( train_dataset=train_dataset, - valid_dataset=valid_dataset, + val_dataset=val_dataset, test_dataset=test_dataset, predict_dataset=predict_dataset, train_transform=train_transform, - valid_transform=valid_transform, + val_transform=val_transform, batch_size=batch_size, num_workers=num_workers, seed=seed, diff --git a/flash/vision/classification/model.py b/flash/vision/classification/model.py index f3774616c4..2182bfbc6f 100644 --- a/flash/vision/classification/model.py +++ b/flash/vision/classification/model.py @@ -60,7 +60,7 @@ class ImageClassifier(ClassificationTask): @property def preprocess(self): - return ImageClassificationPreprocess(predict_transform=ImageClassificationData.default_valid_transforms()) + return ImageClassificationPreprocess(predict_transform=ImageClassificationData.default_val_transforms()) def __init__( self, diff --git a/flash/vision/detection/data.py b/flash/vision/detection/data.py index b3a32b3a35..cc85910ee2 100644 --- a/flash/vision/detection/data.py +++ b/flash/vision/detection/data.py @@ -183,12 +183,12 @@ class ObjectDetectionData(DataModule): def instantiate_preprocess( cls, train_transform: Optional[Callable], - valid_transform: Optional[Callable], + val_transform: Optional[Callable], preprocess_cls: Type[Preprocess] = None ) -> Preprocess: preprocess_cls = preprocess_cls or cls.preprocess_cls - return preprocess_cls(train_transform, valid_transform) + return preprocess_cls(train_transform, val_transform) @classmethod def from_coco( @@ -196,9 +196,9 @@ def from_coco( train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, train_transform: Optional[Callable] = _default_transform, - valid_folder: Optional[str] = None, - valid_ann_file: Optional[str] = None, - valid_transform: Optional[Callable] = _default_transform, + val_folder: Optional[str] = None, + val_ann_file: Optional[str] = None, + val_transform: Optional[Callable] = _default_transform, test_folder: Optional[str] = None, test_ann_file: Optional[str] = None, test_transform: Optional[Callable] = _default_transform, @@ -208,11 +208,11 @@ def from_coco( **kwargs ): - preprocess = cls.instantiate_preprocess(train_transform, valid_transform, preprocess_cls=preprocess_cls) + preprocess = cls.instantiate_preprocess(train_transform, val_transform, preprocess_cls=preprocess_cls) datamodule = cls.from_load_data_inputs( train_load_data_input=(train_folder, train_ann_file, train_transform), - valid_load_data_input=(valid_folder, valid_ann_file, valid_transform) if valid_folder else None, + val_load_data_input=(val_folder, val_ann_file, val_transform) if val_folder else None, test_load_data_input=(test_folder, test_ann_file, test_transform) if test_folder else None, batch_size=batch_size, num_workers=num_workers, diff --git a/flash/vision/embedding/model.py b/flash/vision/embedding/model.py index 67cca42b4b..2611613a26 100644 --- a/flash/vision/embedding/model.py +++ b/flash/vision/embedding/model.py @@ -49,7 +49,7 @@ class ImageEmbedder(Task): @property def preprocess(self): - return ImageClassificationPreprocess(predict_transform=ImageClassificationData.default_valid_transforms()) + return ImageClassificationPreprocess(predict_transform=ImageClassificationData.default_val_transforms()) def __init__( self, diff --git a/flash_examples/finetuning/image_classification.py b/flash_examples/finetuning/image_classification.py index a21090f66c..4a4032ac5d 100644 --- a/flash_examples/finetuning/image_classification.py +++ b/flash_examples/finetuning/image_classification.py @@ -23,7 +23,7 @@ # 2. Load the data datamodule = ImageClassificationData.from_folders( train_folder="data/hymenoptera_data/train/", - valid_folder="data/hymenoptera_data/val/", + val_folder="data/hymenoptera_data/val/", test_folder="data/hymenoptera_data/test/", ) # 3. Build the model diff --git a/flash_examples/finetuning/summarization.py b/flash_examples/finetuning/summarization.py index d25efa697a..08e3f63f4b 100644 --- a/flash_examples/finetuning/summarization.py +++ b/flash_examples/finetuning/summarization.py @@ -23,7 +23,7 @@ # 2. Load the data datamodule = SummarizationData.from_files( train_file="data/xsum/train.csv", - valid_file="data/xsum/valid.csv", + val_file="data/xsum/valid.csv", test_file="data/xsum/test.csv", input="input", target="target" diff --git a/flash_examples/finetuning/text_classification.py b/flash_examples/finetuning/text_classification.py index 0c02be354b..f5977ae113 100644 --- a/flash_examples/finetuning/text_classification.py +++ b/flash_examples/finetuning/text_classification.py @@ -21,7 +21,7 @@ # 2. Load the data datamodule = TextClassificationData.from_files( train_file="data/imdb/train.csv", - valid_file="data/imdb/valid.csv", + val_file="data/imdb/valid.csv", test_file="data/imdb/test.csv", input="review", target="sentiment", diff --git a/flash_examples/finetuning/translation.py b/flash_examples/finetuning/translation.py index c057ec4790..fe3e0a3f24 100644 --- a/flash_examples/finetuning/translation.py +++ b/flash_examples/finetuning/translation.py @@ -23,7 +23,7 @@ # 2. Load the data datamodule = TranslationData.from_files( train_file="data/wmt_en_ro/train.csv", - valid_file="data/wmt_en_ro/valid.csv", + val_file="data/wmt_en_ro/valid.csv", test_file="data/wmt_en_ro/test.csv", input="input", target="target", diff --git a/flash_notebooks/image_classification.ipynb b/flash_notebooks/image_classification.ipynb index 4bf6ba2aae..cb25a201ca 100644 --- a/flash_notebooks/image_classification.ipynb +++ b/flash_notebooks/image_classification.ipynb @@ -134,7 +134,7 @@ "source": [ "datamodule = ImageClassificationData.from_folders(\n", " train_folder=\"data/hymenoptera_data/train/\",\n", - " valid_folder=\"data/hymenoptera_data/val/\",\n", + " val_folder=\"data/hymenoptera_data/val/\",\n", " test_folder=\"data/hymenoptera_data/test/\",\n", ")" ] diff --git a/flash_notebooks/text_classification.ipynb b/flash_notebooks/text_classification.ipynb index 9ad20120ee..f238c09b60 100644 --- a/flash_notebooks/text_classification.ipynb +++ b/flash_notebooks/text_classification.ipynb @@ -111,7 +111,7 @@ "source": [ "datamodule = TextClassificationData.from_files(\n", " train_file=\"data/imdb/train.csv\",\n", - " valid_file=\"data/imdb/valid.csv\",\n", + " val_file=\"data/imdb/valid.csv\",\n", " test_file=\"data/imdb/test.csv\",\n", " input=\"review\",\n", " target=\"sentiment\",\n", diff --git a/tests/data/test_data_pipeline.py b/tests/data/test_data_pipeline.py index 68d10cf151..7aac65b07a 100644 --- a/tests/data/test_data_pipeline.py +++ b/tests/data/test_data_pipeline.py @@ -59,7 +59,7 @@ class CustomDataModule(DataModule): def __init__(self): super().__init__( train_dataset=DummyDataset(), - valid_dataset=DummyDataset(), + val_dataset=DummyDataset(), test_dataset=DummyDataset(), predict_dataset=DummyDataset(), ) @@ -720,7 +720,7 @@ def from_folders( # call ``from_load_data_inputs`` return cls.from_load_data_inputs( train_load_data_input=train_folder, - valid_load_data_input=val_folder, + val_load_data_input=val_folder, test_load_data_input=test_folder, predict_load_data_input=predict_folder, batch_size=batch_size diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index 65e04699a9..1a181a5487 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -83,14 +83,14 @@ def test_emb_sizes(): def test_tabular_data(tmpdir): train_df = TEST_DF_1.copy() - valid_df = TEST_DF_2.copy() + val_df = TEST_DF_2.copy() test_df = TEST_DF_2.copy() dm = TabularData.from_df( train_df, categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", - valid_df=valid_df, + val_df=val_df, test_df=test_df, num_workers=0, batch_size=1, @@ -104,9 +104,9 @@ def test_tabular_data(tmpdir): def test_categorical_target(tmpdir): train_df = TEST_DF_1.copy() - valid_df = TEST_DF_2.copy() + val_df = TEST_DF_2.copy() test_df = TEST_DF_2.copy() - for df in [train_df, valid_df, test_df]: + for df in [train_df, val_df, test_df]: # change int label to string df["label"] = df["label"].astype(str) @@ -115,7 +115,7 @@ def test_categorical_target(tmpdir): categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", - valid_df=valid_df, + val_df=val_df, test_df=test_df, num_workers=0, batch_size=1, @@ -129,14 +129,14 @@ def test_categorical_target(tmpdir): def test_from_df(tmpdir): train_df = TEST_DF_1.copy() - valid_df = TEST_DF_2.copy() + val_df = TEST_DF_2.copy() test_df = TEST_DF_2.copy() dm = TabularData.from_df( train_df, categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", - valid_df=valid_df, + val_df=val_df, test_df=test_df, num_workers=0, batch_size=1 @@ -150,9 +150,9 @@ def test_from_df(tmpdir): def test_from_csv(tmpdir): train_csv = Path(tmpdir) / "train.csv" - valid_csv = test_csv = Path(tmpdir) / "valid.csv" + val_csv = test_csv = Path(tmpdir) / "valid.csv" TEST_DF_1.to_csv(train_csv) - TEST_DF_2.to_csv(valid_csv) + TEST_DF_2.to_csv(val_csv) TEST_DF_2.to_csv(test_csv) dm = TabularData.from_csv( @@ -160,7 +160,7 @@ def test_from_csv(tmpdir): categorical_cols=["category"], numerical_cols=["scalar_b", "scalar_b"], target_col="label", - valid_csv=valid_csv, + val_csv=val_csv, test_csv=test_csv, num_workers=0, batch_size=1 diff --git a/tests/tabular/test_data_model_integration.py b/tests/tabular/test_data_model_integration.py index 6963c561e9..6c022eba0f 100644 --- a/tests/tabular/test_data_model_integration.py +++ b/tests/tabular/test_data_model_integration.py @@ -29,14 +29,14 @@ def test_classification(tmpdir): train_df = TEST_DF_1.copy() - valid_df = TEST_DF_1.copy() + val_df = TEST_DF_1.copy() test_df = TEST_DF_1.copy() data = TabularData.from_df( train_df, categorical_cols=["category"], numerical_cols=["scalar_a", "scalar_b"], target_col="label", - valid_df=valid_df, + val_df=val_df, test_df=test_df, num_workers=0, batch_size=2, diff --git a/tests/text/classification/test_data.py b/tests/text/classification/test_data.py index fdd28be60b..3df3360030 100644 --- a/tests/text/classification/test_data.py +++ b/tests/text/classification/test_data.py @@ -62,7 +62,7 @@ def test_test_valid(tmpdir): dm = TextClassificationData.from_files( backbone=TEST_BACKBONE, train_file=csv_path, - valid_file=csv_path, + val_file=csv_path, test_file=csv_path, input="sentence", target="label", diff --git a/tests/text/summarization/test_data.py b/tests/text/summarization/test_data.py index 4399068085..616a9d6f53 100644 --- a/tests/text/summarization/test_data.py +++ b/tests/text/summarization/test_data.py @@ -62,7 +62,7 @@ def test_from_files(tmpdir): dm = SummarizationData.from_files( backbone=TEST_BACKBONE, train_file=csv_path, - valid_file=csv_path, + val_file=csv_path, test_file=csv_path, input="input", target="target", diff --git a/tests/text/translation/test_data.py b/tests/text/translation/test_data.py index 6ac5eba425..d9e17105ce 100644 --- a/tests/text/translation/test_data.py +++ b/tests/text/translation/test_data.py @@ -62,7 +62,7 @@ def test_from_files(tmpdir): dm = TranslationData.from_files( backbone=TEST_BACKBONE, train_file=csv_path, - valid_file=csv_path, + val_file=csv_path, test_file=csv_path, input="input", target="target", diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index 499f32627c..5eb8827dbd 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -75,9 +75,9 @@ def test_from_filepaths(tmpdir): train_filepaths=[tmpdir / "a", tmpdir / "b"], train_labels=[0, 1], train_transform=None, - valid_filepaths=[tmpdir / "c", tmpdir / "d"], - valid_labels=[0, 1], - valid_transform=None, + val_filepaths=[tmpdir / "c", tmpdir / "d"], + val_labels=[0, 1], + val_transform=None, test_transform=None, test_filepaths=[tmpdir / "e", tmpdir / "f"], test_labels=[0, 1], @@ -105,8 +105,8 @@ def test_categorical_csv_labels(tmpdir): _rand_image().save(train_dir / "train" / "train_2.png") (train_dir / "valid").mkdir() - _rand_image().save(train_dir / "valid" / "valid_1.png") - _rand_image().save(train_dir / "valid" / "valid_2.png") + _rand_image().save(train_dir / "valid" / "val_1.png") + _rand_image().save(train_dir / "valid" / "val_2.png") (train_dir / "test").mkdir() _rand_image().save(train_dir / "test" / "test_1.png") @@ -119,11 +119,9 @@ def test_categorical_csv_labels(tmpdir): ) text_file.close() - valid_csv = os.path.join(tmpdir, 'some_dataset', 'valid.csv') - text_file = open(valid_csv, 'w') - text_file.write( - 'my_id,label_a,label_b,label_c\n"valid_1.png", 0, 1, 0\n"valid_2.png", 0, 0, 1\n"valid_3.png", 1, 0, 0\n' - ) + val_csv = os.path.join(tmpdir, 'some_dataset', 'valid.csv') + text_file = open(val_csv, 'w') + text_file.write('my_id,label_a,label_b,label_c\n"val_1.png", 0, 1, 0\n"val_2.png", 0, 0, 1\n"val_3.png", 1, 0, 0\n') text_file.close() test_csv = os.path.join(tmpdir, 'some_dataset', 'test.csv') @@ -139,8 +137,8 @@ def index_col_collate_fn(x): train_labels = labels_from_categorical_csv( train_csv, 'my_id', feature_cols=['label_a', 'label_b', 'label_c'], index_col_collate_fn=index_col_collate_fn ) - valid_labels = labels_from_categorical_csv( - valid_csv, 'my_id', feature_cols=['label_a', 'label_b', 'label_c'], index_col_collate_fn=index_col_collate_fn + val_labels = labels_from_categorical_csv( + val_csv, 'my_id', feature_cols=['label_a', 'label_b', 'label_c'], index_col_collate_fn=index_col_collate_fn ) test_labels = labels_from_categorical_csv( test_csv, 'my_id', feature_cols=['label_a', 'label_b', 'label_c'], index_col_collate_fn=index_col_collate_fn @@ -148,12 +146,12 @@ def index_col_collate_fn(x): data = ImageClassificationData.from_filepaths( batch_size=2, train_transform=None, - valid_transform=None, + val_transform=None, test_transform=None, train_filepaths=os.path.join(tmpdir, 'some_dataset', 'train'), train_labels=train_labels.values(), - valid_filepaths=os.path.join(tmpdir, 'some_dataset', 'valid'), - valid_labels=valid_labels.values(), + val_filepaths=os.path.join(tmpdir, 'some_dataset', 'valid'), + val_labels=val_labels.values(), test_filepaths=os.path.join(tmpdir, 'some_dataset', 'test'), test_labels=test_labels.values(), ) @@ -193,7 +191,7 @@ def test_from_folders(tmpdir): img_data = ImageClassificationData.from_folders( train_dir, - valid_folder=train_dir, + val_folder=train_dir, test_folder=train_dir, batch_size=1, num_workers=0, diff --git a/tests/vision/detection/test_data.py b/tests/vision/detection/test_data.py index 10b242dc4d..fec4b9a5e8 100644 --- a/tests/vision/detection/test_data.py +++ b/tests/vision/detection/test_data.py @@ -96,8 +96,8 @@ def test_image_detector_data_from_coco(tmpdir): datamodule = ObjectDetectionData.from_coco( train_folder=train_folder, train_ann_file=coco_ann_path, - valid_folder=train_folder, - valid_ann_file=coco_ann_path, + val_folder=train_folder, + val_ann_file=coco_ann_path, test_folder=train_folder, test_ann_file=coco_ann_path, batch_size=1,