diff --git a/CHANGELOG.md b/CHANGELOG.md index 846f045739..e7238b28bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed a bug when not explicitly passing `embedding_sizes` to the `TabularClassifier` and `TabularRegressor` tasks ([#1067](https://github.com/PyTorchLightning/lightning-flash/pull/1067)) + ### Removed ## [0.6.0] - 2021-13-12 diff --git a/flash/tabular/classification/model.py b/flash/tabular/classification/model.py index 392132f4c7..052e59e04a 100644 --- a/flash/tabular/classification/model.py +++ b/flash/tabular/classification/model.py @@ -42,7 +42,8 @@ class TabularClassifier(ClassificationTask): Args: num_features: Number of columns in table (not including target column). num_classes: Number of classes to classify. - embedding_sizes: List of (num_classes, emb_dim) to form categorical embeddings. + embedding_sizes: List of (num_classes, emb_dim) to form categorical embeddings (or ``None`` if there are no + categorical fields in the data). loss_fn: Loss function for training, defaults to cross entropy. optimizer: Optimizer to use for training. lr_scheduler: The LR scheduler to use during training. @@ -63,7 +64,7 @@ def __init__( self, num_features: int, num_classes: int, - embedding_sizes: List[Tuple[int, int]] = None, + embedding_sizes: Optional[List[Tuple[int, int]]] = None, loss_fn: Callable = F.cross_entropy, optimizer: OPTIMIZER_TYPE = "Adam", lr_scheduler: LR_SCHEDULER_TYPE = None, @@ -75,7 +76,11 @@ def __init__( ): self.save_hyperparameters() - cat_dims, cat_emb_dim = zip(*embedding_sizes) if embedding_sizes else ([], []) + if embedding_sizes: + cat_dims, cat_emb_dim = zip(*embedding_sizes) + else: + cat_dims, cat_emb_dim, embedding_sizes = [], [], [] + model = TabNet( input_dim=num_features, output_dim=num_classes, diff --git a/flash/tabular/regression/model.py b/flash/tabular/regression/model.py index 18d9fc453f..0ceace0acc 100644 --- a/flash/tabular/regression/model.py +++ b/flash/tabular/regression/model.py @@ -40,7 +40,8 @@ class TabularRegressor(RegressionTask): Args: num_features: Number of columns in table (not including target column). - embedding_sizes: List of (num_classes, emb_dim) to form categorical embeddings. + embedding_sizes: List of (num_classes, emb_dim) to form categorical embeddings (or ``None`` if there are no + categorical fields in the data). loss_fn: Loss function for training, defaults to cross entropy. optimizer: Optimizer to use for training. lr_scheduler: The LR scheduler to use during training. @@ -60,7 +61,7 @@ class TabularRegressor(RegressionTask): def __init__( self, num_features: int, - embedding_sizes: List[Tuple[int, int]] = None, + embedding_sizes: Optional[List[Tuple[int, int]]] = None, loss_fn: Callable = F.mse_loss, optimizer: OPTIMIZER_TYPE = "Adam", lr_scheduler: LR_SCHEDULER_TYPE = None, @@ -71,7 +72,11 @@ def __init__( ): self.save_hyperparameters() - cat_dims, cat_emb_dim = zip(*embedding_sizes) if embedding_sizes else ([], []) + if embedding_sizes: + cat_dims, cat_emb_dim = zip(*embedding_sizes) + else: + cat_dims, cat_emb_dim, embedding_sizes = [], [], [] + model = TabNet( input_dim=num_features, output_dim=1, diff --git a/tests/tabular/classification/test_model.py b/tests/tabular/classification/test_model.py index a0af90c924..e411c945e9 100644 --- a/tests/tabular/classification/test_model.py +++ b/tests/tabular/classification/test_model.py @@ -67,7 +67,7 @@ def test_init_train_no_num(tmpdir): @pytest.mark.skipif(not _TABULAR_TESTING, reason="tabular libraries aren't installed.") def test_init_train_no_cat(tmpdir): train_dl = torch.utils.data.DataLoader(DummyDataset(num_cat=0), batch_size=16) - model = TabularClassifier(num_classes=10, num_features=16, embedding_sizes=[]) + model = TabularClassifier(num_classes=10, num_features=16) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)