This repository has been archived by the owner on Oct 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 212
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tabular regression task and example (#892)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ethan Harris <[email protected]> Co-authored-by: Ethan Harris <[email protected]>
- Loading branch information
1 parent
d0adc61
commit ba38014
Showing
11 changed files
with
242 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Copyright The PyTorch Lightning team. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from typing import Callable, Mapping, Optional, Sequence, Union | ||
|
||
import torch | ||
import torch.nn.functional as F | ||
import torchmetrics | ||
|
||
from flash.core.data.process import Serializer | ||
from flash.core.model import Task | ||
|
||
|
||
class RegressionMixin: | ||
@staticmethod | ||
def _build( | ||
loss_fn: Optional[Callable] = None, | ||
metrics: Union[torchmetrics.Metric, Mapping, Sequence, None] = None, | ||
): | ||
metrics = metrics or torchmetrics.MeanSquaredError() | ||
loss_fn = loss_fn or F.mse_loss | ||
|
||
return metrics, loss_fn | ||
|
||
def to_metrics_format(self, x: torch.Tensor) -> torch.Tensor: | ||
return x | ||
|
||
|
||
class RegressionTask(Task, RegressionMixin): | ||
def __init__( | ||
self, | ||
*args, | ||
loss_fn: Optional[Callable] = None, | ||
metrics: Union[torchmetrics.Metric, Mapping, Sequence, None] = None, | ||
serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None, | ||
**kwargs, | ||
) -> None: | ||
|
||
metrics, loss_fn = RegressionMixin._build(loss_fn, metrics) | ||
|
||
super().__init__( | ||
*args, | ||
loss_fn=loss_fn, | ||
metrics=metrics, | ||
serializer=serializer, | ||
**kwargs, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from flash.tabular.regression.data import TabularRegressionData # noqa: F401 | ||
from flash.tabular.regression.model import TabularRegressor # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# Copyright The PyTorch Lightning team. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from typing import Any, Callable, List, Tuple | ||
|
||
import torch | ||
from torch.nn import functional as F | ||
|
||
from flash.core.data.data_source import DefaultDataKeys | ||
from flash.core.regression import RegressionTask | ||
from flash.core.utilities.imports import _TABULAR_AVAILABLE | ||
from flash.core.utilities.types import LR_SCHEDULER_TYPE, METRICS_TYPE, OPTIMIZER_TYPE, SERIALIZER_TYPE | ||
|
||
if _TABULAR_AVAILABLE: | ||
from pytorch_tabnet.tab_network import TabNet | ||
|
||
|
||
class TabularRegressor(RegressionTask): | ||
"""The ``TabularRegressor`` is a :class:`~flash.Task` for regression tabular data. | ||
Args: | ||
num_features: Number of columns in table (not including target column). | ||
embedding_sizes: List of (num_classes, emb_dim) to form categorical embeddings. | ||
loss_fn: Loss function for training, defaults to cross entropy. | ||
optimizer: Optimizer to use for training. | ||
lr_scheduler: The LR scheduler to use during training. | ||
metrics: Metrics to compute for training and evaluation. Can either be an metric from the `torchmetrics` | ||
package, a custom metric inherenting from `torchmetrics.Metric`, a callable function or a list/dict | ||
containing a combination of the aforementioned. In all cases, each metric needs to have the signature | ||
`metric(preds,target)` and return a single scalar tensor. | ||
learning_rate: Learning rate to use for training. | ||
multi_label: Whether the targets are multi-label or not. | ||
serializer: The :class:`~flash.core.data.process.Serializer` to use when serializing prediction outputs. | ||
**tabnet_kwargs: Optional additional arguments for the TabNet model, see | ||
`pytorch_tabnet <https://dreamquark-ai.github.io/tabnet/_modules/pytorch_tabnet/tab_network.html#TabNet>`_. | ||
""" | ||
|
||
required_extras: str = "tabular" | ||
|
||
def __init__( | ||
self, | ||
num_features: int, | ||
embedding_sizes: List[Tuple[int, int]] = None, | ||
loss_fn: Callable = F.mse_loss, | ||
optimizer: OPTIMIZER_TYPE = "Adam", | ||
lr_scheduler: LR_SCHEDULER_TYPE = None, | ||
metrics: METRICS_TYPE = None, | ||
learning_rate: float = 1e-2, | ||
serializer: SERIALIZER_TYPE = None, | ||
**tabnet_kwargs, | ||
): | ||
self.save_hyperparameters() | ||
|
||
cat_dims, cat_emb_dim = zip(*embedding_sizes) if embedding_sizes else ([], []) | ||
model = TabNet( | ||
input_dim=num_features, | ||
output_dim=1, | ||
cat_idxs=list(range(len(embedding_sizes))), | ||
cat_dims=list(cat_dims), | ||
cat_emb_dim=list(cat_emb_dim), | ||
**tabnet_kwargs, | ||
) | ||
|
||
super().__init__( | ||
model=model, | ||
loss_fn=loss_fn, | ||
optimizer=optimizer, | ||
lr_scheduler=lr_scheduler, | ||
metrics=metrics, | ||
learning_rate=learning_rate, | ||
serializer=serializer, | ||
) | ||
|
||
self.save_hyperparameters() | ||
|
||
def forward(self, x_in) -> torch.Tensor: | ||
# TabNet takes single input, x_in is composed of (categorical, numerical) | ||
xs = [x for x in x_in if x.numel()] | ||
x = torch.cat(xs, dim=1) | ||
return self.model(x)[0].flatten() | ||
|
||
def training_step(self, batch: Any, batch_idx: int) -> Any: | ||
batch = (batch[DefaultDataKeys.INPUT], batch[DefaultDataKeys.TARGET]) | ||
return super().training_step(batch, batch_idx) | ||
|
||
def validation_step(self, batch: Any, batch_idx: int) -> Any: | ||
batch = (batch[DefaultDataKeys.INPUT], batch[DefaultDataKeys.TARGET]) | ||
return super().validation_step(batch, batch_idx) | ||
|
||
def test_step(self, batch: Any, batch_idx: int) -> Any: | ||
batch = (batch[DefaultDataKeys.INPUT], batch[DefaultDataKeys.TARGET]) | ||
return super().test_step(batch, batch_idx) | ||
|
||
def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any: | ||
batch = batch[DefaultDataKeys.INPUT] | ||
return self(batch) | ||
|
||
@classmethod | ||
def from_data(cls, datamodule, **kwargs) -> "TabularRegressor": | ||
model = cls(datamodule.num_features, datamodule.embedding_sizes, **kwargs) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Copyright The PyTorch Lightning team. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import torch | ||
|
||
import flash | ||
from flash.core.data.utils import download_data | ||
from flash.tabular import TabularRegressionData, TabularRegressor | ||
|
||
# 1. Create the DataModule | ||
download_data("https://archive.ics.uci.edu/ml/machine-learning-databases/00560/SeoulBikeData.csv", "./data") | ||
|
||
datamodule = TabularRegressionData.from_csv( | ||
categorical_fields=["Seasons", "Holiday", "Functioning Day"], | ||
numerical_fields=[ | ||
"Hour", | ||
"Temperature(�C)", | ||
"Humidity(%)", | ||
"Wind speed (m/s)", | ||
"Wind speed (m/s)", | ||
"Visibility (10m)", | ||
"Dew point temperature(�C)", | ||
"Solar Radiation (MJ/m2)", | ||
"Rainfall(mm)", | ||
"Snowfall (cm)", | ||
], | ||
target_fields="Rented Bike Count", | ||
train_file="data/SeoulBikeData.csv", | ||
val_split=0.1, | ||
) | ||
|
||
# 2. Build the task | ||
model = TabularRegressor.from_data(datamodule, learning_rate=0.1) | ||
|
||
# 3. Create the trainer and train the model | ||
trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count()) | ||
trainer.fit(model, datamodule=datamodule) | ||
|
||
# 4. Generate predictions from a CSV | ||
predictions = model.predict("data/SeoulBikeData.csv") | ||
print(predictions) | ||
|
||
# 5. Save the model! | ||
trainer.save_checkpoint("tabular_regression_model.pt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters