From a5542e38b615bb04df0f643cb13efb7b24e4aceb Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 14:09:08 +0530 Subject: [PATCH 01/13] set inputs as optional --- flash/tabular/classification/data/data.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index fdb566ac20..8211639c8c 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -71,9 +71,9 @@ class TabularData(DataModule): def __init__( self, train_df: DataFrame, - categorical_input: List, - numerical_input: List, target: str, + categorical_input: Optional[List] = None, + numerical_input: Optional[List] = None, valid_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, batch_size: int = 2, @@ -82,6 +82,15 @@ def __init__( dfs = [train_df] self._test_df = None + if categorical_input is None and numerical_input is None: + raise TypeError('Both categorical_input and numerical_input are None!') + + if categorical_input is None: + categorical_input = [] + + if numerical_input is None: + numerical_input = [] + if valid_df is not None: dfs.append(valid_df) From ed13008fa70419dd3e385aaa590bab16332a315e Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 14:14:08 +0530 Subject: [PATCH 02/13] fro_csv and from_df args are optional --- flash/tabular/classification/data/data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index 8211639c8c..86bb409766 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -142,8 +142,8 @@ def from_df( cls, train_df: DataFrame, target: str, - categorical_input: List, - numerical_input: List, + categorical_input: Optional[List] = None, + numerical_input: Optional[List] = None, valid_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, batch_size: int = 8, @@ -203,8 +203,8 @@ def from_csv( cls, train_csv: str, target: str, - categorical_input: List, - numerical_input: List, + categorical_input: Optional[List] = None, + numerical_input: Optional[List] = None, valid_csv: Optional[str] = None, test_csv: Optional[str] = None, batch_size: int = 8, From 39623bdaf9ea376a12b1a1fa1ff3a0f84e979179 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 12 Feb 2021 10:17:59 +0100 Subject: [PATCH 03/13] Apply suggestions from code review --- flash/tabular/classification/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index 86bb409766..2a22af307f 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -83,7 +83,7 @@ def __init__( self._test_df = None if categorical_input is None and numerical_input is None: - raise TypeError('Both categorical_input and numerical_input are None!') + raise RuntimeError('Both `categorical_input` and `numerical_input` are None!') if categorical_input is None: categorical_input = [] From 96bc0eca427d14d8131f785a29b31e177ddae16b Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 18:31:04 +0530 Subject: [PATCH 04/13] converted None to empty tuple --- flash/tabular/classification/data/data.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index 86bb409766..02df7cfe25 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -72,8 +72,8 @@ def __init__( self, train_df: DataFrame, target: str, - categorical_input: Optional[List] = None, - numerical_input: Optional[List] = None, + categorical_input: List, + numerical_input: List, valid_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, batch_size: int = 2, @@ -82,14 +82,8 @@ def __init__( dfs = [train_df] self._test_df = None - if categorical_input is None and numerical_input is None: - raise TypeError('Both categorical_input and numerical_input are None!') - - if categorical_input is None: - categorical_input = [] - - if numerical_input is None: - numerical_input = [] + if not categorical_input and not numerical_input: + raise ValueError('Both categorical_input and numerical_input are empty!') if valid_df is not None: dfs.append(valid_df) @@ -142,8 +136,8 @@ def from_df( cls, train_df: DataFrame, target: str, - categorical_input: Optional[List] = None, - numerical_input: Optional[List] = None, + categorical_input: List = (), + numerical_input: List = (), valid_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, batch_size: int = 8, @@ -203,8 +197,8 @@ def from_csv( cls, train_csv: str, target: str, - categorical_input: Optional[List] = None, - numerical_input: Optional[List] = None, + categorical_input: List = (), + numerical_input: List = (), valid_csv: Optional[str] = None, test_csv: Optional[str] = None, batch_size: int = 8, From cadaa8b48aa4541903f8ab8a6c9336466ba1848a Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 18:40:03 +0530 Subject: [PATCH 05/13] test for TabularData empty inputs --- tests/tabular/data/test_data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index de0878589b..4e4abb91f4 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -171,16 +171,16 @@ def test_from_csv(tmpdir): assert num.shape == (1, 2) assert target.shape == (1, ) -def test_non_existent_backbone(): - with pytest.raises(ValueError): + +def test_empty_inputs(): + with pytest.raises(RuntimeError): dm = TabularData.from_df( train_df, - categorical_input=["category"], - numerical_input=["scalar_b", "scalar_b"], + categorical_input=[], + numerical_input=[], target="label", valid_df=valid_df, test_df=test_df, num_workers=0, batch_size=1 ) - ImageClassifier(2, "i am never going to implement this lol") \ No newline at end of file From df29656756c2342f03997bfa0bbef8efe24e9bb8 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 18:45:27 +0530 Subject: [PATCH 06/13] fixed pep --- tests/tabular/data/test_data.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index 4e4abb91f4..925c79698d 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -173,8 +173,11 @@ def test_from_csv(tmpdir): def test_empty_inputs(): + train_df = TEST_DF_1.copy() + valid_df = TEST_DF_2.copy() + test_df = TEST_DF_2.copy() with pytest.raises(RuntimeError): - dm = TabularData.from_df( + TabularData.from_df( train_df, categorical_input=[], numerical_input=[], From 3f06322d8f2488be32018795a04a1e3c95a64fda Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 18:48:37 +0530 Subject: [PATCH 07/13] isort fixed --- tests/tabular/data/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index 925c79698d..c0e2a798be 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import pytest from pathlib import Path from unittest.mock import Mock import numpy as np import pandas as pd +import pytest from flash.tabular import TabularData from flash.tabular.classification.data.dataset import _categorize, _normalize From 84b8010acebaa5d8673cc4ec666ec2f8d672ab53 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 19:42:39 +0530 Subject: [PATCH 08/13] raise error cond --- flash/tabular/classification/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index ea848e87a3..30ec28e014 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -82,7 +82,7 @@ def __init__( dfs = [train_df] self._test_df = None - if categorical_input is None and numerical_input is None: + if not categorical_input and not numerical_input: raise RuntimeError('Both `categorical_input` and `numerical_input` are None!') if categorical_input is None: From 71ab8a849e0e065440a2fee1899b286e5bb0ba49 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 19:44:00 +0530 Subject: [PATCH 09/13] updated test --- tests/tabular/data/test_data.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index c0e2a798be..9e93c5fc5b 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -174,16 +174,12 @@ def test_from_csv(tmpdir): def test_empty_inputs(): train_df = TEST_DF_1.copy() - valid_df = TEST_DF_2.copy() - test_df = TEST_DF_2.copy() with pytest.raises(RuntimeError): TabularData.from_df( train_df, categorical_input=[], numerical_input=[], target="label", - valid_df=valid_df, - test_df=test_df, num_workers=0, batch_size=1 ) From fe8d9d0cc5913c1ada9f020e9849372ef94950e9 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Fri, 12 Feb 2021 19:50:20 +0530 Subject: [PATCH 10/13] yapf formatted --- tests/tabular/data/test_data.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index 9e93c5fc5b..fa9db33145 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -176,10 +176,5 @@ def test_empty_inputs(): train_df = TEST_DF_1.copy() with pytest.raises(RuntimeError): TabularData.from_df( - train_df, - categorical_input=[], - numerical_input=[], - target="label", - num_workers=0, - batch_size=1 + train_df, categorical_input=[], numerical_input=[], target="label", num_workers=0, batch_size=1 ) From 101cee1ee70fa1987b39f1e6d4c14427f8219571 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Sat, 13 Feb 2021 02:58:55 +0530 Subject: [PATCH 11/13] empty tuple to None --- flash/tabular/classification/data/data.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index 30ec28e014..2a22af307f 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -72,8 +72,8 @@ def __init__( self, train_df: DataFrame, target: str, - categorical_input: List, - numerical_input: List, + categorical_input: Optional[List] = None, + numerical_input: Optional[List] = None, valid_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, batch_size: int = 2, @@ -82,7 +82,7 @@ def __init__( dfs = [train_df] self._test_df = None - if not categorical_input and not numerical_input: + if categorical_input is None and numerical_input is None: raise RuntimeError('Both `categorical_input` and `numerical_input` are None!') if categorical_input is None: @@ -142,8 +142,8 @@ def from_df( cls, train_df: DataFrame, target: str, - categorical_input: List = (), - numerical_input: List = (), + categorical_input: Optional[List] = None, + numerical_input: Optional[List] = None, valid_df: Optional[DataFrame] = None, test_df: Optional[DataFrame] = None, batch_size: int = 8, @@ -203,8 +203,8 @@ def from_csv( cls, train_csv: str, target: str, - categorical_input: List = (), - numerical_input: List = (), + categorical_input: Optional[List] = None, + numerical_input: Optional[List] = None, valid_csv: Optional[str] = None, test_csv: Optional[str] = None, batch_size: int = 8, From 2f8e71689b36865bb3648517e6fd4d5db59b3ff2 Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Sat, 13 Feb 2021 03:12:58 +0530 Subject: [PATCH 12/13] empty tuple to None --- tests/tabular/data/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tabular/data/test_data.py b/tests/tabular/data/test_data.py index fa9db33145..7ddbfeb5ea 100644 --- a/tests/tabular/data/test_data.py +++ b/tests/tabular/data/test_data.py @@ -176,5 +176,5 @@ def test_empty_inputs(): train_df = TEST_DF_1.copy() with pytest.raises(RuntimeError): TabularData.from_df( - train_df, categorical_input=[], numerical_input=[], target="label", num_workers=0, batch_size=1 + train_df, categorical_input=None, numerical_input=None, target="label", num_workers=0, batch_size=1 ) From 8d03c0d9842dd9fcb0fac48074eb847057ab8efe Mon Sep 17 00:00:00 2001 From: Kaushik Bokka Date: Sat, 13 Feb 2021 12:48:26 +0530 Subject: [PATCH 13/13] minor enhancements --- flash/tabular/classification/data/data.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/flash/tabular/classification/data/data.py b/flash/tabular/classification/data/data.py index 2a22af307f..8d9977af22 100644 --- a/flash/tabular/classification/data/data.py +++ b/flash/tabular/classification/data/data.py @@ -85,11 +85,8 @@ def __init__( if categorical_input is None and numerical_input is None: raise RuntimeError('Both `categorical_input` and `numerical_input` are None!') - if categorical_input is None: - categorical_input = [] - - if numerical_input is None: - numerical_input = [] + categorical_input = categorical_input if categorical_input is not None else [] + numerical_input = numerical_input if numerical_input is not None else [] if valid_df is not None: dfs.append(valid_df)