From a0167fa644046f4ee230a62a7a5f110ef9682a8b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 19:54:37 -0500 Subject: [PATCH 01/14] added .csv image loading utils --- .../source/reference/image_classification.rst | 1 + flash/core/data/utils.py | 2 +- flash/vision/classification/data.py | 55 +++++++++++++++--- tests/vision/classification/test_data.py | 57 +++++++++++++++++++ 4 files changed, 106 insertions(+), 9 deletions(-) diff --git a/docs/source/reference/image_classification.rst b/docs/source/reference/image_classification.rst index b713d4d309..dc158c1320 100644 --- a/docs/source/reference/image_classification.rst +++ b/docs/source/reference/image_classification.rst @@ -158,6 +158,7 @@ Available backbones: * densenet121 * densenet169 * densenet161 +* swav-imagenet ------ diff --git a/flash/core/data/utils.py b/flash/core/data/utils.py index a497b5f7b4..3fce2e5073 100644 --- a/flash/core/data/utils.py +++ b/flash/core/data/utils.py @@ -19,7 +19,7 @@ import requests import torch from tqdm.auto import tqdm as tq - +import pandas as pd # Code taken from: https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603 # __author__ = "github.com/ruxi" diff --git a/flash/vision/classification/data.py b/flash/vision/classification/data.py index 0de53293b2..d4f25b46f7 100644 --- a/flash/vision/classification/data.py +++ b/flash/vision/classification/data.py @@ -15,6 +15,7 @@ import pathlib from typing import Any, Callable, List, Optional, Sequence, Tuple, Union +import pandas as pd import torch from PIL import Image from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -55,9 +56,13 @@ def __init__( self.labels = labels or [] self.transform = transform self.loader = loader - if self.has_labels: + if not self.has_dict_labels and self.has_labels: self.label_to_class_mapping = dict(map(reversed, enumerate(sorted(set(self.labels))))) + @property + def has_dict_labels(self) -> bool: + return isinstance(self.labels, dict) + @property def has_labels(self) -> bool: return self.labels is not None @@ -71,7 +76,11 @@ def __getitem__(self, index: int) -> Tuple[Any, Optional[int]]: if self.transform is not None: img = self.transform(img) label = None - if self.has_labels: + if self.has_dict_labels: + name = os.path.basename(filename) + label = self.labels[name] + + elif self.has_labels: label = self.labels[index] label = self.label_to_class_mapping[label] return img, label @@ -244,13 +253,13 @@ class ImageClassificationData(DataModule): @classmethod def from_filepaths( cls, - train_filepaths: Optional[Sequence[Union[str, pathlib.Path]]] = None, + train_filepaths: Union[str, Optional[Sequence[Union[str, pathlib.Path]]]] = None, train_labels: Optional[Sequence] = None, train_transform: Optional[Callable] = _default_train_transforms, - valid_filepaths: Optional[Sequence[Union[str, pathlib.Path]]] = None, + valid_filepaths: Union[str, Optional[Sequence[Union[str, pathlib.Path]]]] = None, valid_labels: Optional[Sequence] = None, valid_transform: Optional[Callable] = _default_valid_transforms, - test_filepaths: Optional[Sequence[Union[str, pathlib.Path]]] = None, + test_filepaths: Union[str, Optional[Sequence[Union[str, pathlib.Path]]]] = None, test_labels: Optional[Sequence] = None, loader: Callable = _pil_loader, batch_size: int = 64, @@ -260,13 +269,13 @@ def from_filepaths( """Creates a ImageClassificationData object from lists of image filepaths and labels Args: - train_filepaths: sequence of file paths for training dataset. Defaults to ``None``. + train_filepaths: string or sequence of file paths for training dataset. Defaults to ``None``. train_labels: sequence of labels for training dataset. Defaults to ``None``. train_transform: transforms for training dataset. Defaults to ``None``. - valid_filepaths: sequence of file paths for validation dataset. Defaults to ``None``. + valid_filepaths: string or sequence of file paths for validation dataset. Defaults to ``None``. valid_labels: sequence of labels for validation dataset. Defaults to ``None``. valid_transform: transforms for validation and testing dataset. Defaults to ``None``. - test_filepaths: sequence of file paths for test dataset. Defaults to ``None``. + test_filepaths: string or sequence of file paths for test dataset. Defaults to ``None``. test_labels: sequence of labels for test dataset. Defaults to ``None``. loader: function to load an image file. Defaults to ``None``. batch_size: the batchsize to use for parallel loading. Defaults to ``64``. @@ -278,7 +287,32 @@ def from_filepaths( Examples: >>> img_data = ImageClassificationData.from_filepaths(["a.png", "b.png"], [0, 1]) # doctest: +SKIP + + Example when labels are in .csv file: + + train_labels = labels_from_categorical_csv('path/to/train.csv', 'my_id') + valid_labels = labels_from_categorical_csv(path/to/valid.csv', 'my_id') + test_labels = labels_from_categorical_csv(path/to/tests.csv', 'my_id') + + data = ImageClassificationData.from_filepaths( + batch_size=2, + train_filepaths='path/to/train', + train_labels=train_labels, + valid_filepaths='path/to/valid', + valid_labels=valid_labels, + test_filepaths='path/to/test', + test_labels=test_labels, + ) + """ + # enable passing in a string which loads all files in that folder as a list + if isinstance(train_filepaths, str): + train_filepaths = [os.path.join(train_filepaths, x) for x in os.listdir(train_filepaths)] + if isinstance(valid_filepaths, str): + valid_filepaths = [os.path.join(valid_filepaths, x) for x in os.listdir(valid_filepaths)] + if isinstance(test_filepaths, str): + test_filepaths = [os.path.join(test_filepaths, x) for x in os.listdir(test_filepaths)] + train_ds = FilepathDataset( filepaths=train_filepaths, labels=train_labels, @@ -311,6 +345,11 @@ def from_filepaths( num_workers=num_workers, ) + @classmethod + def __auto_generate_labels_from_csv(cls, csv): + df = pd.read_csv(csv) + print('a') + @classmethod def from_folders( cls, diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index d53c98d841..2b5ec197bb 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -19,6 +19,8 @@ from torchvision import transforms as T from flash.vision import ImageClassificationData +import os +from flash.data import labels_from_categorical_csv def _dummy_image_loader(filepath): @@ -72,6 +74,61 @@ def test_from_filepaths(tmpdir): assert labels.shape == (1, ) +def test_categorical_csv_labels(tmpdir): + train_dir = Path(tmpdir / "some_dataset") + train_dir.mkdir() + + (train_dir / "train").mkdir() + _rand_image().save(train_dir / "train" / "train_1.png") + _rand_image().save(train_dir / "train" / "train_2.png") + + (train_dir / "valid").mkdir() + _rand_image().save(train_dir / "valid" / "valid_1.png") + _rand_image().save(train_dir / "valid" / "valid_2.png") + + (train_dir / "test").mkdir() + _rand_image().save(train_dir / "test" / "test_1.png") + _rand_image().save(train_dir / "test" / "test_2.png") + + train_csv = os.path.join(tmpdir, 'some_dataset', 'train.csv') + text_file = open(train_csv, 'w') + text_file.write('my_id, label_a, label_b, label_c\n"train_1.png", 0, 1, 0\n"train_2.png", 0, 0, 1\n"train_2.png", 1, 0, 0\n') + text_file.close() + + valid_csv = os.path.join(tmpdir, 'some_dataset', 'valid.csv') + text_file = open(valid_csv, 'w') + text_file.write('my_id, label_a, label_b, label_c\n"valid_1.png", 0, 1, 0\n"valid_2.png", 0, 0, 1\n"valid_3.png", 1, 0, 0\n') + text_file.close() + + test_csv = os.path.join(tmpdir, 'some_dataset', 'test.csv') + text_file = open(test_csv, 'w') + text_file.write('my_id, label_a, label_b, label_c\n"test_1.png", 0, 1, 0\n"test_2.png", 0, 0, 1\n"test_3.png", 1, 0, 0\n') + text_file.close() + + train_labels = labels_from_categorical_csv(train_csv, 'my_id') + valid_labels = labels_from_categorical_csv(valid_csv, 'my_id') + test_labels = labels_from_categorical_csv(test_csv, 'my_id') + + data = ImageClassificationData.from_filepaths( + batch_size=2, + train_filepaths=os.path.join(tmpdir, 'some_dataset', 'train'), + train_labels=train_labels, + valid_filepaths=os.path.join(tmpdir, 'some_dataset', 'valid'), + valid_labels=valid_labels, + test_filepaths=os.path.join(tmpdir, 'some_dataset', 'test'), + test_labels=test_labels, + ) + + for (x, y) in data.train_dataloader(): + assert len(x) == 2 + + for (x, y) in data.val_dataloader(): + assert len(x) == 2 + + for (x, y) in data.test_dataloader(): + assert len(x) == 2 + + def test_from_folders(tmpdir): train_dir = Path(tmpdir / "train") train_dir.mkdir() From 1659c43934a35b43a7b7cf40e471314b54554dd0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 19:56:58 -0500 Subject: [PATCH 02/14] added .csv image loading utils --- flash/vision/classification/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/vision/classification/data.py b/flash/vision/classification/data.py index d4f25b46f7..8d30263c8b 100644 --- a/flash/vision/classification/data.py +++ b/flash/vision/classification/data.py @@ -288,7 +288,7 @@ def from_filepaths( Examples: >>> img_data = ImageClassificationData.from_filepaths(["a.png", "b.png"], [0, 1]) # doctest: +SKIP - Example when labels are in .csv file: + Example when labels are in .csv file:: train_labels = labels_from_categorical_csv('path/to/train.csv', 'my_id') valid_labels = labels_from_categorical_csv(path/to/valid.csv', 'my_id') From b7942c1322967cad791884ee94e1ff59bbaa9524 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:05:05 -0500 Subject: [PATCH 03/14] added .csv image loading utils --- tests/vision/classification/test_data.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index 2b5ec197bb..337685934c 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -92,17 +92,20 @@ def test_categorical_csv_labels(tmpdir): train_csv = os.path.join(tmpdir, 'some_dataset', 'train.csv') text_file = open(train_csv, 'w') - text_file.write('my_id, label_a, label_b, label_c\n"train_1.png", 0, 1, 0\n"train_2.png", 0, 0, 1\n"train_2.png", 1, 0, 0\n') + text_file.write('my_id, label_a, label_b, label_c\n"train_1.png", 0, 1, 0\n' + '"train_2.png", 0, 0, 1\n"train_2.png", 1, 0, 0\n') text_file.close() valid_csv = os.path.join(tmpdir, 'some_dataset', 'valid.csv') text_file = open(valid_csv, 'w') - text_file.write('my_id, label_a, label_b, label_c\n"valid_1.png", 0, 1, 0\n"valid_2.png", 0, 0, 1\n"valid_3.png", 1, 0, 0\n') + text_file.write('my_id, label_a, label_b, label_c\n"valid_1.png", 0, 1, 0\n' + '"valid_2.png", 0, 0, 1\n"valid_3.png", 1, 0, 0\n') text_file.close() test_csv = os.path.join(tmpdir, 'some_dataset', 'test.csv') text_file = open(test_csv, 'w') - text_file.write('my_id, label_a, label_b, label_c\n"test_1.png", 0, 1, 0\n"test_2.png", 0, 0, 1\n"test_3.png", 1, 0, 0\n') + text_file.write('my_id, label_a, label_b, label_c\n"test_1.png", 0, 1, 0\n' + '"test_2.png", 0, 0, 1\n"test_3.png", 1, 0, 0\n') text_file.close() train_labels = labels_from_categorical_csv(train_csv, 'my_id') From e2aa2aa64020bbb463bde94e1c39b55aa7aa1213 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:09:53 -0500 Subject: [PATCH 04/14] added .csv image loading utils --- tests/vision/classification/test_data.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index 337685934c..c17ffa193e 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -92,20 +92,23 @@ def test_categorical_csv_labels(tmpdir): train_csv = os.path.join(tmpdir, 'some_dataset', 'train.csv') text_file = open(train_csv, 'w') - text_file.write('my_id, label_a, label_b, label_c\n"train_1.png", 0, 1, 0\n' - '"train_2.png", 0, 0, 1\n"train_2.png", 1, 0, 0\n') + text_file.write( + 'my_id, label_a, label_b, label_c\n"train_1.png", 0, 1, 0\n"train_2.png", 0, 0, 1\n"train_2.png", 1, 0, 0\n' + ) text_file.close() valid_csv = os.path.join(tmpdir, 'some_dataset', 'valid.csv') text_file = open(valid_csv, 'w') - text_file.write('my_id, label_a, label_b, label_c\n"valid_1.png", 0, 1, 0\n' - '"valid_2.png", 0, 0, 1\n"valid_3.png", 1, 0, 0\n') + text_file.write( + 'my_id, label_a, label_b, label_c\n"valid_1.png", 0, 1, 0\n"valid_2.png", 0, 0, 1\n"valid_3.png", 1, 0, 0\n' + ) text_file.close() test_csv = os.path.join(tmpdir, 'some_dataset', 'test.csv') text_file = open(test_csv, 'w') - text_file.write('my_id, label_a, label_b, label_c\n"test_1.png", 0, 1, 0\n' - '"test_2.png", 0, 0, 1\n"test_3.png", 1, 0, 0\n') + text_file.write( + 'my_id, label_a, label_b, label_c\n"test_1.png", 0, 1, 0\n"test_2.png", 0, 0, 1\n"test_3.png", 1, 0, 0\n' + ) text_file.close() train_labels = labels_from_categorical_csv(train_csv, 'my_id') From bdca16690edc878d226cb1303526da2f3b6e0d6f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:11:31 -0500 Subject: [PATCH 05/14] added .csv image loading utils --- flash/vision/classification/data.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/flash/vision/classification/data.py b/flash/vision/classification/data.py index 8d30263c8b..4af59a8764 100644 --- a/flash/vision/classification/data.py +++ b/flash/vision/classification/data.py @@ -345,11 +345,6 @@ def from_filepaths( num_workers=num_workers, ) - @classmethod - def __auto_generate_labels_from_csv(cls, csv): - df = pd.read_csv(csv) - print('a') - @classmethod def from_folders( cls, From 94bd9cddb4510ea910e63565821ae50259d2abdc Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:12:51 -0500 Subject: [PATCH 06/14] added .csv image loading utils --- flash/core/data/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/core/data/utils.py b/flash/core/data/utils.py index 3fce2e5073..a497b5f7b4 100644 --- a/flash/core/data/utils.py +++ b/flash/core/data/utils.py @@ -19,7 +19,7 @@ import requests import torch from tqdm.auto import tqdm as tq -import pandas as pd + # Code taken from: https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603 # __author__ = "github.com/ruxi" From 11568de38bd79bd2b647be86d6ac32bf90225fc6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:14:41 -0500 Subject: [PATCH 07/14] added .csv image loading utils --- tests/vision/classification/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index c17ffa193e..d047d80b61 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os from pathlib import Path import numpy as np @@ -19,7 +20,6 @@ from torchvision import transforms as T from flash.vision import ImageClassificationData -import os from flash.data import labels_from_categorical_csv From 6a2b0311b040c8e2cc1ebcbf712c5e6e83c16297 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:16:36 -0500 Subject: [PATCH 08/14] added .csv image loading utils --- tests/vision/classification/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index d047d80b61..deeb963414 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -19,8 +19,8 @@ from PIL import Image from torchvision import transforms as T -from flash.vision import ImageClassificationData from flash.data import labels_from_categorical_csv +from flash.vision import ImageClassificationData def _dummy_image_loader(filepath): From 5f58b3a2b5b97109025ff020ccd95a453e0f2f26 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:35:25 -0500 Subject: [PATCH 09/14] added .csv image loading utils --- tests/vision/classification/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/vision/classification/test_data.py b/tests/vision/classification/test_data.py index deeb963414..f05ecab306 100644 --- a/tests/vision/classification/test_data.py +++ b/tests/vision/classification/test_data.py @@ -19,7 +19,7 @@ from PIL import Image from torchvision import transforms as T -from flash.data import labels_from_categorical_csv +from flash.data.data_utils import labels_from_categorical_csv from flash.vision import ImageClassificationData From 7055e407cb44e42c8fa5e4509495f83b11283960 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:40:00 -0500 Subject: [PATCH 10/14] added .csv image loading utils --- .gitignore | 1 - flash/data/__init__.py | 1 + flash/data/data_utils.py | 26 ++++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 flash/data/__init__.py create mode 100644 flash/data/data_utils.py diff --git a/.gitignore b/.gitignore index 679add61c6..943abcb9bb 100644 --- a/.gitignore +++ b/.gitignore @@ -137,6 +137,5 @@ docs/api/ titanic.csv .vscode data_folder -data *.pt *.zip diff --git a/flash/data/__init__.py b/flash/data/__init__.py new file mode 100644 index 0000000000..9efb1c789b --- /dev/null +++ b/flash/data/__init__.py @@ -0,0 +1 @@ +from flash.data.data_utils import labels_from_categorical_csv diff --git a/flash/data/data_utils.py b/flash/data/data_utils.py new file mode 100644 index 0000000000..f147d98764 --- /dev/null +++ b/flash/data/data_utils.py @@ -0,0 +1,26 @@ +import pandas as pd + + +def labels_from_categorical_csv(csv, index_col, return_dict=True): + """ + Returns a dictionary with {index_col: label} for each entry in the csv. + + Expects a csv of this form: + + index_col, b, c, d + some_name, 0 0 1 + some_name_b, 1 0 0 + + """ + df = pd.read_csv(csv) + # get names + names = df[index_col].to_list() + del df[index_col] + + # everything else is binary + labels = df.to_numpy().argmax(1).tolist() + + if return_dict: + labels = {name: label for name, label in zip(names, labels)} + + return labels From 785440133678f641b9af4c4e923fd9786490dca4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:45:10 -0500 Subject: [PATCH 11/14] added .csv image loading utils --- flash/data/data_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flash/data/data_utils.py b/flash/data/data_utils.py index f147d98764..0eb9f2547d 100644 --- a/flash/data/data_utils.py +++ b/flash/data/data_utils.py @@ -1,7 +1,9 @@ +from typing import Dict, List, Union + import pandas as pd -def labels_from_categorical_csv(csv, index_col, return_dict=True): +def labels_from_categorical_csv(csv, index_col, return_dict=True) -> Union[Dict, List]: """ Returns a dictionary with {index_col: label} for each entry in the csv. From 45dc467b4afeada0d3820c37a0e3debe6c3cf4eb Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:49:04 -0500 Subject: [PATCH 12/14] added .csv image loading utils --- flash/data/data_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/data/data_utils.py b/flash/data/data_utils.py index 0eb9f2547d..5f666e8c3e 100644 --- a/flash/data/data_utils.py +++ b/flash/data/data_utils.py @@ -3,7 +3,7 @@ import pandas as pd -def labels_from_categorical_csv(csv, index_col, return_dict=True) -> Union[Dict, List]: +def labels_from_categorical_csv(csv: str, index_col: str, return_dict: dict =True) -> Union[Dict, List]: """ Returns a dictionary with {index_col: label} for each entry in the csv. From 70b42a99e7c1fc2bbbd23d597b5088b7eeaf382e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:49:14 -0500 Subject: [PATCH 13/14] added .csv image loading utils --- flash/data/data_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/data/data_utils.py b/flash/data/data_utils.py index 5f666e8c3e..0529884b34 100644 --- a/flash/data/data_utils.py +++ b/flash/data/data_utils.py @@ -3,7 +3,7 @@ import pandas as pd -def labels_from_categorical_csv(csv: str, index_col: str, return_dict: dict =True) -> Union[Dict, List]: +def labels_from_categorical_csv(csv: str, index_col: str, return_dict: dict = True) -> Union[Dict, List]: """ Returns a dictionary with {index_col: label} for each entry in the csv. From 3a07e7d367cadf22be7684850ec97c6f5067e1c3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 13 Feb 2021 20:58:54 -0500 Subject: [PATCH 14/14] added .csv image loading utils --- flash/data/data_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/data/data_utils.py b/flash/data/data_utils.py index 0529884b34..50e6b3bf63 100644 --- a/flash/data/data_utils.py +++ b/flash/data/data_utils.py @@ -3,7 +3,7 @@ import pandas as pd -def labels_from_categorical_csv(csv: str, index_col: str, return_dict: dict = True) -> Union[Dict, List]: +def labels_from_categorical_csv(csv: str, index_col: str, return_dict: bool = True) -> Union[Dict, List]: """ Returns a dictionary with {index_col: label} for each entry in the csv.