diff --git a/docs/apidocs_model.md b/docs/apidocs_model.md index 01d8270f..d0312742 100644 --- a/docs/apidocs_model.md +++ b/docs/apidocs_model.md @@ -30,6 +30,9 @@ ::: pytorch_tabular.models.TabTransformerConfig options: heading_level: 3 +::: pytorch_tabular.models.StackingModelConfig + options: + heading_level: 3 ::: pytorch_tabular.config.ModelConfig options: heading_level: 3 @@ -66,7 +69,9 @@ ::: pytorch_tabular.models.TabTransformerModel options: heading_level: 3 - +::: pytorch_tabular.models.StackingModel + options: + heading_level: 3 ## Base Model Class ::: pytorch_tabular.models.BaseModel options: diff --git a/docs/imgs/model_stacking_concept.png b/docs/imgs/model_stacking_concept.png new file mode 100644 index 00000000..6a0b36fd Binary files /dev/null and b/docs/imgs/model_stacking_concept.png differ diff --git a/docs/models.md b/docs/models.md index 787746a7..928c9acc 100644 --- a/docs/models.md +++ b/docs/models.md @@ -253,6 +253,30 @@ All the parameters have beet set to recommended values from the paper. Let's loo **For a complete list of parameters refer to the API Docs** [pytorch_tabular.models.DANetConfig][] +## Model Stacking + +Model stacking is an ensemble learning technique that combines multiple base models to create a more powerful predictive model. Each base model processes the input features independently, and their outputs are concatenated before making the final prediction. This allows the model to leverage different learning patterns captured by each backbone architecture. You can use it by choosing `StackingModelConfig`. + +The following diagram shows the concept of model stacking in PyTorch Tabular. +data:image/s3,"s3://crabby-images/b7b77/b7b77f5392bb4c116322b7d532064360e4f0b1de" alt="Model Stacking" + +The following model architectures are supported for stacking: +- Category Embedding Model +- TabNet Model +- FTTransformer Model +- Gated Additive Tree Ensemble Model +- DANet Model +- AutoInt Model +- GANDALF Model +- Node Model + +All the parameters have been set to provide flexibility while maintaining ease of use. Let's look at them: + +- `model_configs`: List[ModelConfig]: List of configurations for each base model. Each config should be a valid PyTorch Tabular model config (e.g., NodeConfig, GANDALFConfig) + +**For a complete list of parameters refer to the API Docs** +[pytorch_tabular.models.StackingModelConfig][] + ## Implementing New Architectures PyTorch Tabular is very easy to extend and infinitely customizable. All the models that have been implemented in PyTorch Tabular inherits an Abstract Class `BaseModel` which is in fact a PyTorchLightning Model. diff --git a/docs/tutorials/16-Model Stacking.ipynb b/docs/tutorials/16-Model Stacking.ipynb new file mode 100644 index 00000000..4af4092c --- /dev/null +++ b/docs/tutorials/16-Model Stacking.ipynb @@ -0,0 +1,1486 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model Stacking in PyTorch Tabular\n", + "\n", + "This page demonstrates how to use model stacking functionality in PyTorch Tabular to combine multiple models for better predictions.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Setup and Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from pytorch_tabular import TabularModel\n", + "from pytorch_tabular.models import (\n", + "CategoryEmbeddingModelConfig,\n", + "FTTransformerConfig,\n", + "TabNetModelConfig\n", + ")\n", + "from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig\n", + "from pytorch_tabular.models.stacking import StackingModelConfig\n", + "from pytorch_tabular.utils import make_mixed_dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create synthetic classification dataset & split into train, validation and test sets" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data, cat_col_names, num_col_names = make_mixed_dataset(\n", + " task=\"classification\", n_samples=3000, n_features=7, n_categories=4\n", + ")\n", + "\n", + "train, test = train_test_split(data, random_state=42)\n", + "train, valid = train_test_split(train, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Common configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "data_config = DataConfig(\n", + " target=[\"target\"],\n", + " continuous_cols=num_col_names,\n", + " categorical_cols=cat_col_names,\n", + ")\n", + "trainer_config = TrainerConfig(\n", + " batch_size=1024,\n", + " max_epochs=20,\n", + " early_stopping=\"valid_accuracy\",\n", + " early_stopping_mode=\"max\",\n", + " early_stopping_patience=3,\n", + " checkpoints=\"valid_accuracy\",\n", + " load_best=True,\n", + ")\n", + "optimizer_config = OptimizerConfig()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure individual models" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "model_config_1 = CategoryEmbeddingModelConfig(\n", + " task=\"classification\",\n", + " layers=\"128-64-32\",\n", + " activation=\"ReLU\",\n", + " learning_rate=1e-3\n", + ")\n", + "model_config_2 = FTTransformerConfig(\n", + " task=\"classification\",\n", + " input_embed_dim=32,\n", + " num_attn_blocks=2,\n", + " num_heads=4,\n", + " learning_rate=1e-3\n", + ")\n", + "model_config_3 = TabNetModelConfig(\n", + " task=\"classification\",\n", + " n_d=8,\n", + " n_a=8,\n", + " n_steps=3,\n", + " learning_rate=1e-3\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Configure Stacking Model\n", + "\n", + "Now let's set up the stacking configuration that will combine these models:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "stacking_config = StackingModelConfig(\n", + " task=\"classification\",\n", + " model_configs=[\n", + " model_config_1,\n", + " model_config_2,\n", + " model_config_3\n", + " ],\n", + " head=\"LinearHead\",\n", + " head_config={\n", + " \"layers\": \"64\",\n", + " \"activation\": \"ReLU\",\n", + " \"dropout\": 0.1\n", + " },\n", + " learning_rate=1e-3\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Train Stacking Model" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,338 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m338\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,388 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m388\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,394 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for \n", + "classification task \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m394\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,462 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: StackingModel \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m462\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: StackingModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,516 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m516\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,813 - {pytorch_tabular.tabular_model:679} - INFO - Training Started \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m813\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃ ┃ Name ┃ Type ┃ Params ┃ Mode ┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│ 0 │ _backbone │ StackingBackbone │ 77.2 K │ train │\n", + "│ 1 │ _embedding_layer │ StackingEmbeddingLayer │ 917 │ train │\n", + "│ 2 │ _head │ LinearHead │ 12.5 K │ train │\n", + "│ 3 │ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴────────────────────────┴────────┴───────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _backbone │ StackingBackbone │ 77.2 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ StackingEmbeddingLayer │ 917 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ _head │ LinearHead │ 12.5 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴────────────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 90.6 K \n", + "Non-trainable params: 0 \n", + "Total params: 90.6 K \n", + "Total estimated model params size (MB): 0 \n", + "Modules in train mode: 188 \n", + "Modules in eval mode: 0 \n", + "\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 90.6 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 90.6 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 188 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3cd6f3938b1f419c8b07eb89ffa13bf4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:39,304 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:39\u001b[0m,\u001b[1;36m304\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:39,307 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:39\u001b[0m,\u001b[1;36m307\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric ┃ DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test_accuracy │ 0.5960000157356262 │\n", + "│ test_loss │ 0.7419928312301636 │\n", + "│ test_loss_0 │ 0.7419928312301636 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.5960000157356262 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.7419928312301636 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.7419928312301636 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "predictions = stacking_model.predict(test)\n", + "stacking_metrics = stacking_model.evaluate(test)[0]\n", + "stacking_acc = stacking_metrics[\"test_accuracy\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare with individual models" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "def train_and_evaluate_model(model_config, name):\n", + " model = TabularModel(\n", + " data_config=data_config,\n", + " model_config=model_config,\n", + " optimizer_config=optimizer_config,\n", + " trainer_config=trainer_config,\n", + " )\n", + " model.fit(train=train, validation=valid)\n", + " metrics = model.evaluate(test)\n", + " print(f\"\\n{name} Metrics:\")\n", + " print(metrics)\n", + " return metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,257 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m257\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,320 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m320\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,340 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for \n", + "classification task \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m340\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,376 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: CategoryEmbeddingModel \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m376\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: CategoryEmbeddingModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,411 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m411\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,638 - {pytorch_tabular.tabular_model:679} - INFO - Training Started \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m638\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃ ┃ Name ┃ Type ┃ Params ┃ Mode ┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│ 0 │ _backbone │ CategoryEmbeddingBackbone │ 12.1 K │ train │\n", + "│ 1 │ _embedding_layer │ Embedding1dLayer │ 53 │ train │\n", + "│ 2 │ head │ LinearHead │ 66 │ train │\n", + "│ 3 │ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴───────────────────────────┴────────┴───────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _backbone │ CategoryEmbeddingBackbone │ 12.1 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ Embedding1dLayer │ 53 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ head │ LinearHead │ 66 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴───────────────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 12.2 K \n", + "Non-trainable params: 0 \n", + "Total params: 12.2 K \n", + "Total estimated model params size (MB): 0 \n", + "Modules in train mode: 19 \n", + "Modules in eval mode: 0 \n", + "\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 12.2 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 12.2 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 19 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "03ed36b48da24bb19f036d1db4422cb7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:04,935 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:04\u001b[0m,\u001b[1;36m935\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:04,938 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:04\u001b[0m,\u001b[1;36m938\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bdcb7befb3b340a895a5399394780d7e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric ┃ DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test_accuracy │ 0.4586666524410248 │\n", + "│ test_loss │ 0.8828091025352478 │\n", + "│ test_loss_0 │ 0.8828091025352478 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4586666524410248 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.8828091025352478 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.8828091025352478 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Category Embedding Metrics:\n", + "[{'test_loss_0': 0.8828091025352478, 'test_loss': 0.8828091025352478, 'test_accuracy': 0.4586666524410248}]\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,183 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m183\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,263 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m263\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,272 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for \n", + "classification task \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m272\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,294 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: FTTransformerModel \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m294\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: FTTransformerModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,323 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m323\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,623 - {pytorch_tabular.tabular_model:679} - INFO - Training Started \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m623\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃ ┃ Name ┃ Type ┃ Params ┃ Mode ┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│ 0 │ _backbone │ FTTransformerBackbone │ 57.7 K │ train │\n", + "│ 1 │ _embedding_layer │ Embedding2dLayer │ 864 │ train │\n", + "│ 2 │ _head │ LinearHead │ 66 │ train │\n", + "│ 3 │ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴───────────────────────┴────────┴───────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _backbone │ FTTransformerBackbone │ 57.7 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ Embedding2dLayer │ 864 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ _head │ LinearHead │ 66 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴───────────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 58.6 K \n", + "Non-trainable params: 0 \n", + "Total params: 58.6 K \n", + "Total estimated model params size (MB): 0 \n", + "Modules in train mode: 56 \n", + "Modules in eval mode: 0 \n", + "\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 58.6 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 58.6 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 56 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "62184d0ac93049058c153f2e93518d0f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,482 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m482\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,488 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m488\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5a482fb9cd5045e3ada1beac9c114d97", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric ┃ DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test_accuracy │ 0.5546666383743286 │\n", + "│ test_loss │ 0.6846821904182434 │\n", + "│ test_loss_0 │ 0.6846821904182434 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.5546666383743286 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6846821904182434 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6846821904182434 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "FT Transformer Metrics:\n", + "[{'test_loss_0': 0.6846821904182434, 'test_loss': 0.6846821904182434, 'test_accuracy': 0.5546666383743286}]\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,824 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m824\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,863 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m863\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,870 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for \n", + "classification task \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m870\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,900 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: TabNetModel \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m900\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: TabNetModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,965 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m965\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:08,200 - {pytorch_tabular.tabular_model:679} - INFO - Training Started \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:08\u001b[0m,\u001b[1;36m200\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃ ┃ Name ┃ Type ┃ Params ┃ Mode ┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│ 0 │ _embedding_layer │ Identity │ 0 │ train │\n", + "│ 1 │ _backbone │ TabNetBackbone │ 6.4 K │ train │\n", + "│ 2 │ _head │ Identity │ 0 │ train │\n", + "│ 3 │ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴──────────────────┴────────┴───────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ Identity │ 0 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _backbone │ TabNetBackbone │ 6.4 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ _head │ Identity │ 0 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴──────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 6.4 K \n", + "Non-trainable params: 0 \n", + "Total params: 6.4 K \n", + "Total estimated model params size (MB): 0 \n", + "Modules in train mode: 111 \n", + "Modules in eval mode: 0 \n", + "\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 6.4 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 6.4 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 111 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2e27939fc57d4c9585a3252b035e74f8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:09,766 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:09\u001b[0m,\u001b[1;36m766\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:09,767 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:09\u001b[0m,\u001b[1;36m767\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8a98c3a2c4ce4bcaac279982ec86bd8f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃ Test metric ┃ DataLoader 0 ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│ test_accuracy │ 0.4346666634082794 │\n", + "│ test_loss │ 1.1570961475372314 │\n", + "│ test_loss_0 │ 1.1570961475372314 │\n", + "└───────────────────────────┴───────────────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4346666634082794 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1570961475372314 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1570961475372314 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "TabNet Metrics:\n", + "[{'test_loss_0': 1.1570961475372314, 'test_loss': 1.1570961475372314, 'test_accuracy': 0.4346666634082794}]\n" + ] + } + ], + "source": [ + "ce_metrics = train_and_evaluate_model(model_config_1, \"Category Embedding\")[0]\n", + "ft_metrics = train_and_evaluate_model(model_config_2, \"FT Transformer\")[0]\n", + "tab_metrics = train_and_evaluate_model(model_config_3, \"TabNet\")[0]\n", + "ce_acc = ce_metrics[\"test_accuracy\"]\n", + "ft_acc = ft_metrics[\"test_accuracy\"]\n", + "tab_acc = tab_metrics[\"test_accuracy\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Stacking Model Test Accuracy: 0.5960000157356262\n", + "Category Embedding Model Test Accucacy: 0.4586666524410248\n", + "FT Transformer Model Test Accuracy: 0.5546666383743286\n", + "TabNet Model Test Accuracy: 0.4346666634082794\n" + ] + } + ], + "source": [ + "print(\"Stacking Model Test Accuracy: {}\".format(stacking_acc))\n", + "print(\"Category Embedding Model Test Accucacy: {}\".format(ce_acc))\n", + "print(\"FT Transformer Model Test Accuracy: {}\".format(ft_acc))\n", + "print(\"TabNet Model Test Accuracy: {}\".format(tab_acc))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save the stacking model & load it" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2024-12-12 00:00:31,524 - {pytorch_tabular.tabular_model:1579} - WARNING - Directory is not empty. Overwriting the \n", + "contents. \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:00:31\u001b[0m,\u001b[1;36m524\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1579\u001b[0m\u001b[1m}\u001b[0m - WARNING - Directory is not empty. Overwriting the \n", + "contents. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "stacking_model.save_model(\"stacking_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2024-12-12 00:00:32,437 - {pytorch_tabular.tabular_model:172} - INFO - Experiment Tracking is turned off \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:00:32\u001b[0m,\u001b[1;36m437\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m172\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:00:32,452 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer \n", + "\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:00:32\u001b[0m,\u001b[1;36m452\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Trainer already configured with model summary callbacks: [