diff --git a/docs/apidocs_model.md b/docs/apidocs_model.md index 01d8270f..d0312742 100644 --- a/docs/apidocs_model.md +++ b/docs/apidocs_model.md @@ -30,6 +30,9 @@ ::: pytorch_tabular.models.TabTransformerConfig options: heading_level: 3 +::: pytorch_tabular.models.StackingModelConfig + options: + heading_level: 3 ::: pytorch_tabular.config.ModelConfig options: heading_level: 3 @@ -66,7 +69,9 @@ ::: pytorch_tabular.models.TabTransformerModel options: heading_level: 3 - +::: pytorch_tabular.models.StackingModel + options: + heading_level: 3 ## Base Model Class ::: pytorch_tabular.models.BaseModel options: diff --git a/docs/imgs/model_stacking_concept.png b/docs/imgs/model_stacking_concept.png new file mode 100644 index 00000000..6a0b36fd Binary files /dev/null and b/docs/imgs/model_stacking_concept.png differ diff --git a/docs/models.md b/docs/models.md index 787746a7..928c9acc 100644 --- a/docs/models.md +++ b/docs/models.md @@ -253,6 +253,30 @@ All the parameters have beet set to recommended values from the paper. Let's loo **For a complete list of parameters refer to the API Docs** [pytorch_tabular.models.DANetConfig][] +## Model Stacking + +Model stacking is an ensemble learning technique that combines multiple base models to create a more powerful predictive model. Each base model processes the input features independently, and their outputs are concatenated before making the final prediction. This allows the model to leverage different learning patterns captured by each backbone architecture. You can use it by choosing `StackingModelConfig`. + +The following diagram shows the concept of model stacking in PyTorch Tabular. +![Model Stacking](imgs/model_stacking_concept.png) + +The following model architectures are supported for stacking: +- Category Embedding Model +- TabNet Model +- FTTransformer Model +- Gated Additive Tree Ensemble Model +- DANet Model +- AutoInt Model +- GANDALF Model +- Node Model + +All the parameters have been set to provide flexibility while maintaining ease of use. Let's look at them: + +- `model_configs`: List[ModelConfig]: List of configurations for each base model. Each config should be a valid PyTorch Tabular model config (e.g., NodeConfig, GANDALFConfig) + +**For a complete list of parameters refer to the API Docs** +[pytorch_tabular.models.StackingModelConfig][] + ## Implementing New Architectures PyTorch Tabular is very easy to extend and infinitely customizable. All the models that have been implemented in PyTorch Tabular inherits an Abstract Class `BaseModel` which is in fact a PyTorchLightning Model. diff --git a/docs/tutorials/16-Model Stacking.ipynb b/docs/tutorials/16-Model Stacking.ipynb new file mode 100644 index 00000000..4af4092c --- /dev/null +++ b/docs/tutorials/16-Model Stacking.ipynb @@ -0,0 +1,1486 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model Stacking in PyTorch Tabular\n", + "\n", + "This page demonstrates how to use model stacking functionality in PyTorch Tabular to combine multiple models for better predictions.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Setup and Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from pytorch_tabular import TabularModel\n", + "from pytorch_tabular.models import (\n", + "CategoryEmbeddingModelConfig,\n", + "FTTransformerConfig,\n", + "TabNetModelConfig\n", + ")\n", + "from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig\n", + "from pytorch_tabular.models.stacking import StackingModelConfig\n", + "from pytorch_tabular.utils import make_mixed_dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create synthetic classification dataset & split into train, validation and test sets" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data, cat_col_names, num_col_names = make_mixed_dataset(\n", + " task=\"classification\", n_samples=3000, n_features=7, n_categories=4\n", + ")\n", + "\n", + "train, test = train_test_split(data, random_state=42)\n", + "train, valid = train_test_split(train, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Common configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "data_config = DataConfig(\n", + " target=[\"target\"],\n", + " continuous_cols=num_col_names,\n", + " categorical_cols=cat_col_names,\n", + ")\n", + "trainer_config = TrainerConfig(\n", + " batch_size=1024,\n", + " max_epochs=20,\n", + " early_stopping=\"valid_accuracy\",\n", + " early_stopping_mode=\"max\",\n", + " early_stopping_patience=3,\n", + " checkpoints=\"valid_accuracy\",\n", + " load_best=True,\n", + ")\n", + "optimizer_config = OptimizerConfig()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure individual models" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "model_config_1 = CategoryEmbeddingModelConfig(\n", + " task=\"classification\",\n", + " layers=\"128-64-32\",\n", + " activation=\"ReLU\",\n", + " learning_rate=1e-3\n", + ")\n", + "model_config_2 = FTTransformerConfig(\n", + " task=\"classification\",\n", + " input_embed_dim=32,\n", + " num_attn_blocks=2,\n", + " num_heads=4,\n", + " learning_rate=1e-3\n", + ")\n", + "model_config_3 = TabNetModelConfig(\n", + " task=\"classification\",\n", + " n_d=8,\n", + " n_a=8,\n", + " n_steps=3,\n", + " learning_rate=1e-3\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Configure Stacking Model\n", + "\n", + "Now let's set up the stacking configuration that will combine these models:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "stacking_config = StackingModelConfig(\n", + " task=\"classification\",\n", + " model_configs=[\n", + " model_config_1,\n", + " model_config_2,\n", + " model_config_3\n", + " ],\n", + " head=\"LinearHead\",\n", + " head_config={\n", + " \"layers\": \"64\",\n", + " \"activation\": \"ReLU\",\n", + " \"dropout\": 0.1\n", + " },\n", + " learning_rate=1e-3\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Train Stacking Model" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,338 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off           \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m338\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,388 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders                   \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m388\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,394 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for          \n",
+       "classification task                                                                                                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m394\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,462 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: StackingModel          \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m462\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: StackingModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,516 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer                       \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m516\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:35,813 - {pytorch_tabular.tabular_model:679} - INFO - Training Started                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:35\u001b[0m,\u001b[1;36m813\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n",
+       "┃    Name              Type                    Params  Mode  ┃\n",
+       "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n",
+       "│ 0 │ _backbone        │ StackingBackbone       │ 77.2 K │ train │\n",
+       "│ 1 │ _embedding_layer │ StackingEmbeddingLayer │    917 │ train │\n",
+       "│ 2 │ _head            │ LinearHead             │ 12.5 K │ train │\n",
+       "│ 3 │ loss             │ CrossEntropyLoss       │      0 │ train │\n",
+       "└───┴──────────────────┴────────────────────────┴────────┴───────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _backbone │ StackingBackbone │ 77.2 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ StackingEmbeddingLayer │ 917 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ _head │ LinearHead │ 12.5 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴────────────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 90.6 K                                                                                           \n",
+       "Non-trainable params: 0                                                                                            \n",
+       "Total params: 90.6 K                                                                                               \n",
+       "Total estimated model params size (MB): 0                                                                          \n",
+       "Modules in train mode: 188                                                                                         \n",
+       "Modules in eval mode: 0                                                                                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 90.6 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 90.6 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 188 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3cd6f3938b1f419c8b07eb89ffa13bf4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:02:39,304 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:39\u001b[0m,\u001b[1;36m304\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:02:39,307 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model                     \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:02:39\u001b[0m,\u001b[1;36m307\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stacking_model = TabularModel(\n", + " data_config=data_config,\n", + " model_config=stacking_config,\n", + " optimizer_config=optimizer_config,\n", + " trainer_config=trainer_config,\n", + ")\n", + "stacking_model.fit(\n", + " train=train,\n", + " validation=valid\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate Results" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b1616690de674da8bbc8cc985f19686a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│       test_accuracy           0.5960000157356262     │\n",
+       "│         test_loss             0.7419928312301636     │\n",
+       "│        test_loss_0            0.7419928312301636     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.5960000157356262 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.7419928312301636 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.7419928312301636 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "predictions = stacking_model.predict(test)\n",
+    "stacking_metrics = stacking_model.evaluate(test)[0]\n",
+    "stacking_acc = stacking_metrics[\"test_accuracy\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compare with individual models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_and_evaluate_model(model_config, name):\n",
+    "    model = TabularModel(\n",
+    "    data_config=data_config,\n",
+    "    model_config=model_config,\n",
+    "    optimizer_config=optimizer_config,\n",
+    "    trainer_config=trainer_config,\n",
+    "    )\n",
+    "    model.fit(train=train, validation=valid)\n",
+    "    metrics = model.evaluate(test)\n",
+    "    print(f\"\\n{name} Metrics:\")\n",
+    "    print(metrics)\n",
+    "    return metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:09:01,257 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off           \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m257\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,320 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders                   \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m320\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,340 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for          \n",
+       "classification task                                                                                                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m340\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,376 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: CategoryEmbeddingModel \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m376\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: CategoryEmbeddingModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,411 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer                       \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m411\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:01,638 - {pytorch_tabular.tabular_model:679} - INFO - Training Started                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:01\u001b[0m,\u001b[1;36m638\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n",
+       "┃    Name              Type                       Params  Mode  ┃\n",
+       "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n",
+       "│ 0 │ _backbone        │ CategoryEmbeddingBackbone │ 12.1 K │ train │\n",
+       "│ 1 │ _embedding_layer │ Embedding1dLayer          │     53 │ train │\n",
+       "│ 2 │ head             │ LinearHead                │     66 │ train │\n",
+       "│ 3 │ loss             │ CrossEntropyLoss          │      0 │ train │\n",
+       "└───┴──────────────────┴───────────────────────────┴────────┴───────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _backbone │ CategoryEmbeddingBackbone │ 12.1 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ Embedding1dLayer │ 53 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ head │ LinearHead │ 66 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴───────────────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 12.2 K                                                                                           \n",
+       "Non-trainable params: 0                                                                                            \n",
+       "Total params: 12.2 K                                                                                               \n",
+       "Total estimated model params size (MB): 0                                                                          \n",
+       "Modules in train mode: 19                                                                                          \n",
+       "Modules in eval mode: 0                                                                                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 12.2 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 12.2 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 19 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "03ed36b48da24bb19f036d1db4422cb7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=20` reached.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:09:04,935 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:04\u001b[0m,\u001b[1;36m935\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:04,938 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model                     \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:04\u001b[0m,\u001b[1;36m938\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bdcb7befb3b340a895a5399394780d7e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│       test_accuracy           0.4586666524410248     │\n",
+       "│         test_loss             0.8828091025352478     │\n",
+       "│        test_loss_0            0.8828091025352478     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4586666524410248 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.8828091025352478 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.8828091025352478 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Category Embedding Metrics:\n",
+      "[{'test_loss_0': 0.8828091025352478, 'test_loss': 0.8828091025352478, 'test_accuracy': 0.4586666524410248}]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:09:05,183 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off           \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m183\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,263 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders                   \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m263\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,272 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for          \n",
+       "classification task                                                                                                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m272\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,294 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: FTTransformerModel     \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m294\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: FTTransformerModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,323 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer                       \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m323\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:05,623 - {pytorch_tabular.tabular_model:679} - INFO - Training Started                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:05\u001b[0m,\u001b[1;36m623\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n",
+       "┃    Name              Type                   Params  Mode  ┃\n",
+       "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n",
+       "│ 0 │ _backbone        │ FTTransformerBackbone │ 57.7 K │ train │\n",
+       "│ 1 │ _embedding_layer │ Embedding2dLayer      │    864 │ train │\n",
+       "│ 2 │ _head            │ LinearHead            │     66 │ train │\n",
+       "│ 3 │ loss             │ CrossEntropyLoss      │      0 │ train │\n",
+       "└───┴──────────────────┴───────────────────────┴────────┴───────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _backbone │ FTTransformerBackbone │ 57.7 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ Embedding2dLayer │ 864 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ _head │ LinearHead │ 66 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴───────────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 58.6 K                                                                                           \n",
+       "Non-trainable params: 0                                                                                            \n",
+       "Total params: 58.6 K                                                                                               \n",
+       "Total estimated model params size (MB): 0                                                                          \n",
+       "Modules in train mode: 56                                                                                          \n",
+       "Modules in eval mode: 0                                                                                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 58.6 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 58.6 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 56 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "62184d0ac93049058c153f2e93518d0f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:09:07,482 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m482\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,488 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model                     \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m488\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5a482fb9cd5045e3ada1beac9c114d97", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│       test_accuracy           0.5546666383743286     │\n",
+       "│         test_loss             0.6846821904182434     │\n",
+       "│        test_loss_0            0.6846821904182434     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.5546666383743286 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6846821904182434 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.6846821904182434 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "FT Transformer Metrics:\n",
+      "[{'test_loss_0': 0.6846821904182434, 'test_loss': 0.6846821904182434, 'test_accuracy': 0.5546666383743286}]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:09:07,824 - {pytorch_tabular.tabular_model:147} - INFO - Experiment Tracking is turned off           \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m824\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m147\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 42\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,863 - {pytorch_tabular.tabular_model:549} - INFO - Preparing the DataLoaders                   \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m863\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m549\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the DataLoaders \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,870 - {pytorch_tabular.tabular_datamodule:527} - INFO - Setting up the datamodule for          \n",
+       "classification task                                                                                                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m870\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_datamodul\u001b[1;92me:527\u001b[0m\u001b[1m}\u001b[0m - INFO - Setting up the datamodule for \n", + "classification task \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,900 - {pytorch_tabular.tabular_model:600} - INFO - Preparing the Model: TabNetModel            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m900\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m600\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Model: TabNetModel \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:07,965 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer                       \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:07\u001b[0m,\u001b[1;36m965\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:08,200 - {pytorch_tabular.tabular_model:679} - INFO - Training Started                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:08\u001b[0m,\u001b[1;36m200\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m679\u001b[0m\u001b[1m}\u001b[0m - INFO - Training Started \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n",
+       "┃    Name              Type              Params  Mode  ┃\n",
+       "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n",
+       "│ 0 │ _embedding_layer │ Identity         │      0 │ train │\n",
+       "│ 1 │ _backbone        │ TabNetBackbone   │  6.4 K │ train │\n",
+       "│ 2 │ _head            │ Identity         │      0 │ train │\n",
+       "│ 3 │ loss             │ CrossEntropyLoss │      0 │ train │\n",
+       "└───┴──────────────────┴──────────────────┴────────┴───────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ _embedding_layer │ Identity │ 0 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ _backbone │ TabNetBackbone │ 6.4 K │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ _head │ Identity │ 0 │ train │\n", + "│\u001b[2m \u001b[0m\u001b[2m3\u001b[0m\u001b[2m \u001b[0m│ loss │ CrossEntropyLoss │ 0 │ train │\n", + "└───┴──────────────────┴──────────────────┴────────┴───────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 6.4 K                                                                                            \n",
+       "Non-trainable params: 0                                                                                            \n",
+       "Total params: 6.4 K                                                                                                \n",
+       "Total estimated model params size (MB): 0                                                                          \n",
+       "Modules in train mode: 111                                                                                         \n",
+       "Modules in eval mode: 0                                                                                            \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mTrainable params\u001b[0m: 6.4 K \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 6.4 K \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n", + "\u001b[1mModules in train mode\u001b[0m: 111 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2e27939fc57d4c9585a3252b035e74f8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:09:09,766 - {pytorch_tabular.tabular_model:692} - INFO - Training the model completed                \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:09\u001b[0m,\u001b[1;36m766\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m692\u001b[0m\u001b[1m}\u001b[0m - INFO - Training the model completed \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:09:09,767 - {pytorch_tabular.tabular_model:1533} - INFO - Loading the best model                     \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:09:09\u001b[0m,\u001b[1;36m767\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1533\u001b[0m\u001b[1m}\u001b[0m - INFO - Loading the best model \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8a98c3a2c4ce4bcaac279982ec86bd8f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│       test_accuracy           0.4346666634082794     │\n",
+       "│         test_loss             1.1570961475372314     │\n",
+       "│        test_loss_0            1.1570961475372314     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test_accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4346666634082794 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1570961475372314 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_loss_0 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.1570961475372314 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "TabNet Metrics:\n",
+      "[{'test_loss_0': 1.1570961475372314, 'test_loss': 1.1570961475372314, 'test_accuracy': 0.4346666634082794}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "ce_metrics = train_and_evaluate_model(model_config_1, \"Category Embedding\")[0]\n",
+    "ft_metrics = train_and_evaluate_model(model_config_2, \"FT Transformer\")[0]\n",
+    "tab_metrics = train_and_evaluate_model(model_config_3, \"TabNet\")[0]\n",
+    "ce_acc = ce_metrics[\"test_accuracy\"]\n",
+    "ft_acc = ft_metrics[\"test_accuracy\"]\n",
+    "tab_acc = tab_metrics[\"test_accuracy\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Stacking Model Test Accuracy: 0.5960000157356262\n",
+      "Category Embedding Model Test Accucacy: 0.4586666524410248\n",
+      "FT Transformer Model Test Accuracy: 0.5546666383743286\n",
+      "TabNet Model Test Accuracy: 0.4346666634082794\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Stacking Model Test Accuracy: {}\".format(stacking_acc))\n",
+    "print(\"Category Embedding Model Test Accucacy: {}\".format(ce_acc))\n",
+    "print(\"FT Transformer Model Test Accuracy: {}\".format(ft_acc))\n",
+    "print(\"TabNet Model Test Accuracy: {}\".format(tab_acc))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save the stacking model & load it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
2024-12-12 00:00:31,524 - {pytorch_tabular.tabular_model:1579} - WARNING - Directory is not empty. Overwriting the \n",
+       "contents.                                                                                                          \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:00:31\u001b[0m,\u001b[1;36m524\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m1579\u001b[0m\u001b[1m}\u001b[0m - WARNING - Directory is not empty. Overwriting the \n", + "contents. \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "stacking_model.save_model(\"stacking_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
2024-12-12 00:00:32,437 - {pytorch_tabular.tabular_model:172} - INFO - Experiment Tracking is turned off           \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:00:32\u001b[0m,\u001b[1;36m437\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m172\u001b[0m\u001b[1m}\u001b[0m - INFO - Experiment Tracking is turned off \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
2024-12-12 00:00:32,452 - {pytorch_tabular.tabular_model:343} - INFO - Preparing the Trainer                       \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;36m2024\u001b[0m-\u001b[1;36m12\u001b[0m-\u001b[1;36m12\u001b[0m \u001b[1;92m00:00:32\u001b[0m,\u001b[1;36m452\u001b[0m - \u001b[1m{\u001b[0mpytorch_tabular.tabular_model:\u001b[1;36m343\u001b[0m\u001b[1m}\u001b[0m - INFO - Preparing the Trainer \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], + "source": [ + "loaded_model = TabularModel.load_model(\"stacking_model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Key Points About Stacking\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "1. The stacking model combines predictions from multiple base models into a final prediction\n", + "2. Each base model can have its own architecture and hyperparameters\n", + "3. The head layer combines the outputs from all base models\n", + "4. Base models are trained simultaneously\n", + "5. The stacking model can often achieve better performance than individual models\n", + "\n", + "## Tips for Better Stacking Results\n", + "\n", + "1. Use diverse base models that capture different aspects of the data\n", + "2. Experiment with different head architectures\n", + "3. Consider using cross-validation for more robust stacking\n", + "4. Balance model complexity with training time\n", + "5. Monitor individual model performances to ensure they contribute meaningfully\n", + "\n", + "This example demonstrates basic stacking functionality. For production use cases, you may want to:\n", + "- Use cross-validation\n", + "- Implement more sophisticated ensemble techniques\n", + "- Add custom metrics\n", + "- Tune hyperparameters for both base models and stacking head" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mkdocs.yml b/mkdocs.yml index 59a38a83..9f3aeca7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -24,6 +24,7 @@ nav: - SHAP, Deep LIFT and so on through Captum Integration: "tutorials/14-Explainability.ipynb" - Custom PyTorch Models: - Implementing New Supervised Architectures: "tutorials/04-Implementing New Architectures.ipynb" + - Model Stacking: "tutorials/16-Model Stacking.ipynb" - Other Features: - Using Neural Categorical Embeddings in Scikit-Learn Workflows: "tutorials/03-Neural Embedding in Scikit-Learn Workflows.ipynb" - Self-Supervised Learning using Denoising Autoencoders: "tutorials/08-Self-Supervised Learning-DAE.ipynb" diff --git a/src/pytorch_tabular/models/__init__.py b/src/pytorch_tabular/models/__init__.py index 0ae80b8e..e4d3353b 100644 --- a/src/pytorch_tabular/models/__init__.py +++ b/src/pytorch_tabular/models/__init__.py @@ -19,6 +19,7 @@ from .gate import GatedAdditiveTreeEnsembleConfig, GatedAdditiveTreeEnsembleModel from .mixture_density import MDNConfig, MDNModel from .node import NodeConfig, NODEModel +from .stacking import StackingModel, StackingModelConfig from .tab_transformer import TabTransformerConfig, TabTransformerModel from .tabnet import TabNetModel, TabNetModelConfig @@ -45,6 +46,8 @@ "GANDALFBackbone", "DANetConfig", "DANetModel", + "StackingModel", + "StackingModelConfig", "category_embedding", "node", "mixture_density", @@ -55,4 +58,5 @@ "gate", "gandalf", "danet", + "stacking", ] diff --git a/src/pytorch_tabular/models/stacking/__init__.py b/src/pytorch_tabular/models/stacking/__init__.py new file mode 100644 index 00000000..ca69b8ae --- /dev/null +++ b/src/pytorch_tabular/models/stacking/__init__.py @@ -0,0 +1,4 @@ +from .config import StackingModelConfig +from .stacking_model import StackingBackbone, StackingModel + +__all__ = ["StackingModel", "StackingModelConfig", "StackingBackbone"] diff --git a/src/pytorch_tabular/models/stacking/config.py b/src/pytorch_tabular/models/stacking/config.py new file mode 100644 index 00000000..586cddab --- /dev/null +++ b/src/pytorch_tabular/models/stacking/config.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass, field + +from pytorch_tabular.config import ModelConfig + + +@dataclass +class StackingModelConfig(ModelConfig): + """StackingModelConfig is a configuration class for the StackingModel. It is used to stack multiple models + together. Now, CategoryEmbeddingModel, TabNetModel, FTTransformerModel, GatedAdditiveTreeEnsembleModel, DANetModel, + AutoIntModel, GANDALFModel, NodeModel are supported. + + Args: + model_configs (list[ModelConfig]): List of model configs to stack. + + """ + + model_configs: list = field(default_factory=list, metadata={"help": "List of model configs to stack"}) + _module_src: str = field(default="models.stacking") + _model_name: str = field(default="StackingModel") + _backbone_name: str = field(default="StackingBackbone") + _config_name: str = field(default="StackingConfig") + + +# if __name__ == "__main__": +# from pytorch_tabular.utils import generate_doc_dataclass +# print(generate_doc_dataclass(StackingModelConfig)) diff --git a/src/pytorch_tabular/models/stacking/stacking_model.py b/src/pytorch_tabular/models/stacking/stacking_model.py new file mode 100644 index 00000000..a1090089 --- /dev/null +++ b/src/pytorch_tabular/models/stacking/stacking_model.py @@ -0,0 +1,140 @@ +import inspect + +import torch +import torch.nn as nn +from omegaconf import DictConfig + +import pytorch_tabular.models as models +from pytorch_tabular.models import BaseModel +from pytorch_tabular.models.common.heads import blocks +from pytorch_tabular.models.gate import GatedAdditiveTreesBackbone +from pytorch_tabular.models.node import NODEBackbone + + +def instantiate_backbone(hparams, backbone_name): + backbone_class = getattr(getattr(models, hparams._module_src.split(".")[-1]), backbone_name) + class_args = list(inspect.signature(backbone_class).parameters.keys()) + if "config" in class_args: + return backbone_class(config=hparams) + else: + return backbone_class( + **{ + arg: getattr(hparams, arg) if arg != "block_activation" else getattr(nn, getattr(hparams, arg))() + for arg in class_args + } + ) + + +class StackingEmbeddingLayer(nn.Module): + def __init__(self, embedding_layers: nn.ModuleList): + super().__init__() + self.embedding_layers = embedding_layers + + def forward(self, x): + outputs = [] + for embedding_layer in self.embedding_layers: + em_output = embedding_layer(x) + outputs.append(em_output) + return outputs + + +class StackingBackbone(nn.Module): + def __init__(self, config: DictConfig): + super().__init__() + self.hparams = config + self._build_network() + + def _build_network(self): + self._backbones = nn.ModuleList() + self._heads = nn.ModuleList() + self._backbone_output_dims = [] + assert len(self.hparams.model_configs) > 0, "Stacking requires more than 0 model" + for model_i in range(len(self.hparams.model_configs)): + # move necessary params to each model config + self.hparams.model_configs[model_i].embedded_cat_dim = self.hparams.embedded_cat_dim + self.hparams.model_configs[model_i].continuous_dim = self.hparams.continuous_dim + self.hparams.model_configs[model_i].n_continuous_features = self.hparams.continuous_dim + + self.hparams.model_configs[model_i].embedding_dims = self.hparams.embedding_dims + self.hparams.model_configs[model_i].categorical_cardinality = self.hparams.categorical_cardinality + self.hparams.model_configs[model_i].categorical_dim = self.hparams.categorical_dim + self.hparams.model_configs[model_i].cat_embedding_dims = self.hparams.embedding_dims + + # if output_dim is not set, set it to 128 + if getattr(self.hparams.model_configs[model_i], "output_dim", None) is None: + self.hparams.model_configs[model_i].output_dim = 128 + + # if inferred_config is not set, set it to None. + if getattr(self.hparams, "inferred_config", None) is not None: + self.hparams.model_configs[model_i].inferred_config = self.hparams.inferred_config + + # instantiate backbone + _backbone = instantiate_backbone( + self.hparams.model_configs[model_i], self.hparams.model_configs[model_i]._backbone_name + ) + # set continuous_dim + _backbone.continuous_dim = self.hparams.continuous_dim + # if output_dim is not set, set it to the output_dim in model_config + if getattr(_backbone, "output_dim", None) is None: + setattr( + _backbone, + "output_dim", + self.hparams.model_configs[model_i].output_dim, + ) + self._backbones.append(_backbone) + self._backbone_output_dims.append(_backbone.output_dim) + + self.output_dim = sum(self._backbone_output_dims) + + def _build_embedding_layer(self): + assert getattr(self, "_backbones", None) is not None, "Backbones are not built" + embedding_layers = nn.ModuleList() + for backbone in self._backbones: + if getattr(backbone, "_build_embedding_layer", None) is None: + embedding_layers.append(nn.Identity()) + else: + embedding_layers.append(backbone._build_embedding_layer()) + return StackingEmbeddingLayer(embedding_layers) + + def forward(self, x_list): + outputs = [] + for i, backbone in enumerate(self._backbones): + bb_output = backbone(x_list[i]) + if len(bb_output.shape) == 3 and isinstance(backbone, GatedAdditiveTreesBackbone): + bb_output = bb_output.mean(dim=-1) + elif len(bb_output.shape) == 3 and isinstance(backbone, NODEBackbone): + bb_output = bb_output.mean(dim=1) + outputs.append(bb_output) + x = torch.cat(outputs, dim=1) + return x + + +class StackingModel(BaseModel): + def __init__(self, config: DictConfig, **kwargs): + super().__init__(config, **kwargs) + + def _build_network(self): + self._backbone = StackingBackbone(self.hparams) + self._embedding_layer = self._backbone._build_embedding_layer() + self.output_dim = self._backbone.output_dim + self._head = self._get_head_from_config() + + def _get_head_from_config(self): + _head_callable = getattr(blocks, self.hparams.head) + return _head_callable( + in_units=self.output_dim, + output_dim=self.hparams.output_dim, + config=_head_callable._config_template(**self.hparams.head_config), + ) + + @property + def backbone(self): + return self._backbone + + @property + def embedding_layer(self): + return self._embedding_layer + + @property + def head(self): + return self._head diff --git a/src/pytorch_tabular/models/tabnet/config.py b/src/pytorch_tabular/models/tabnet/config.py index ade0c6a0..c1142273 100644 --- a/src/pytorch_tabular/models/tabnet/config.py +++ b/src/pytorch_tabular/models/tabnet/config.py @@ -129,6 +129,7 @@ class TabNetModelConfig(ModelConfig): _module_src: str = field(default="models.tabnet") _model_name: str = field(default="TabNetModel") _config_name: str = field(default="TabNetModelConfig") + _backbone_name: str = field(default="TabNetBackbone") # if __name__ == "__main__": diff --git a/tests/test_model_stacking.py b/tests/test_model_stacking.py new file mode 100644 index 00000000..629c19a4 --- /dev/null +++ b/tests/test_model_stacking.py @@ -0,0 +1,223 @@ +import numpy as np +import pytest +import torch +from sklearn.preprocessing import PowerTransformer + +from pytorch_tabular import TabularModel +from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig +from pytorch_tabular.models.autoint import AutoIntConfig +from pytorch_tabular.models.category_embedding import CategoryEmbeddingModelConfig +from pytorch_tabular.models.danet import DANetConfig +from pytorch_tabular.models.ft_transformer import FTTransformerConfig +from pytorch_tabular.models.gandalf import GANDALFConfig +from pytorch_tabular.models.gate import GatedAdditiveTreeEnsembleConfig +from pytorch_tabular.models.node import NodeConfig +from pytorch_tabular.models.stacking import StackingModelConfig +from pytorch_tabular.models.tabnet import TabNetModelConfig + + +def fake_metric(y_hat, y): + return (y_hat - y).mean() + + +def get_model_configs(task): + all_model_configs = [ + lambda task: CategoryEmbeddingModelConfig( + task=task, + ), + lambda task: TabNetModelConfig( + task=task, + ), + lambda task: FTTransformerConfig( + task=task, + ), + lambda task: GatedAdditiveTreeEnsembleConfig( + task=task, + ), + lambda task: DANetConfig( + task=task, + ), + lambda task: AutoIntConfig( + task=task, + ), + lambda task: GANDALFConfig( + task=task, + ), + lambda task: NodeConfig( + task=task, + ), + ] + return [model_config(task) for model_config in all_model_configs] + + +@pytest.mark.parametrize("multi_target", [True, False]) +@pytest.mark.parametrize( + "continuous_cols", + [ + [ + "AveRooms", + "AveBedrms", + "Population", + "AveOccup", + "Latitude", + "Longitude", + ], + [], + ], +) +@pytest.mark.parametrize("categorical_cols", [["HouseAgeBin"], []]) +@pytest.mark.parametrize("continuous_feature_transform", [None, "yeo-johnson"]) +@pytest.mark.parametrize("normalize_continuous_features", [True, False]) +@pytest.mark.parametrize("target_range", [True, False]) +@pytest.mark.parametrize( + "target_transform", + [None, PowerTransformer(), (lambda x: np.power(x, 2), lambda x: np.sqrt(x))], +) +@pytest.mark.parametrize("virtual_bz", [None, 32]) +# @pytest.mark.parametrize("custom_loss", [None, torch.nn.L1Loss()]) +# @pytest.mark.parametrize("custom_optimizer", [None, torch.optim.Adagrad]) +@pytest.mark.parametrize( + "custom_args", [(None, None, None, None), ([fake_metric], [False], torch.nn.L1Loss(), torch.optim.Adagrad)] +) +@pytest.mark.parametrize("custom_head_config", [None, "", "32", "32-32"]) +@pytest.mark.parametrize("model_configs", [get_model_configs("regression")]) +def test_regression( + regression_data, + multi_target, + continuous_cols, + categorical_cols, + continuous_feature_transform, + normalize_continuous_features, + target_range, + target_transform, + virtual_bz, + # custom_metrics, + # custom_loss, + # custom_optimizer, + custom_args, + custom_head_config, + model_configs, +): + (train, test, target) = regression_data + (custom_metrics, custom_metrics_prob_input, custom_loss, custom_optimizer) = custom_args + if len(continuous_cols) + len(categorical_cols) == 0: + return + + data_config = DataConfig( + target=target + ["MedInc"] if multi_target else target, + continuous_cols=continuous_cols, + categorical_cols=categorical_cols, + continuous_feature_transform=continuous_feature_transform, + normalize_continuous_features=normalize_continuous_features, + ) + model_config_params = {"task": "regression", "virtual_batch_size": virtual_bz} + + if target_range: + _target_range = [] + for target in data_config.target: + _target_range.append( + ( + float(train[target].min()), + float(train[target].max()), + ) + ) + model_config_params["target_range"] = _target_range + if custom_head_config is not None: + model_config_params["head"] = "LinearHead" + model_config_params["head_config"] = {"layers": custom_head_config} + + model_config_params["model_configs"] = model_configs + model_config = StackingModelConfig(**model_config_params) + trainer_config = TrainerConfig( + max_epochs=3, + checkpoints=None, + early_stopping=None, + accelerator="cpu", + fast_dev_run=True, + ) + optimizer_config = OptimizerConfig() + + tabular_model = TabularModel( + data_config=data_config, + model_config=model_config, + optimizer_config=optimizer_config, + trainer_config=trainer_config, + ) + tabular_model.fit( + train=train, + metrics=custom_metrics, + metrics_prob_inputs=custom_metrics_prob_input, + target_transform=target_transform, + loss=custom_loss, + optimizer=custom_optimizer, + optimizer_params={}, + ) + + result = tabular_model.evaluate(test) + # print(result[0]["valid_loss"]) + if custom_metrics is None: + assert "test_mean_squared_error" in result[0].keys() + else: + assert "test_fake_metric" in result[0].keys() + pred_df = tabular_model.predict(test) + assert pred_df.shape[0] == test.shape[0] + + +@pytest.mark.parametrize("multi_target", [False, True]) +@pytest.mark.parametrize( + "continuous_cols", + [ + [f"feature_{i}" for i in range(54)], + [], + ], +) +@pytest.mark.parametrize("categorical_cols", [["feature_0_cat"], []]) +@pytest.mark.parametrize("continuous_feature_transform", [None]) +@pytest.mark.parametrize("normalize_continuous_features", [True]) +@pytest.mark.parametrize("model_configs", [get_model_configs("classification")]) +def test_classification( + classification_data, + multi_target, + continuous_cols, + categorical_cols, + continuous_feature_transform, + normalize_continuous_features, + model_configs, +): + (train, test, target) = classification_data + if len(continuous_cols) + len(categorical_cols) == 0: + return + + data_config = DataConfig( + target=target + ["feature_53"] if multi_target else target, + continuous_cols=continuous_cols, + categorical_cols=categorical_cols, + continuous_feature_transform=continuous_feature_transform, + normalize_continuous_features=normalize_continuous_features, + ) + model_config_params = {"task": "classification"} + + model_config_params["model_configs"] = model_configs + model_config = StackingModelConfig(**model_config_params) + trainer_config = TrainerConfig( + max_epochs=3, + checkpoints=None, + early_stopping=None, + accelerator="cpu", + fast_dev_run=True, + ) + optimizer_config = OptimizerConfig() + + tabular_model = TabularModel( + data_config=data_config, + model_config=model_config, + optimizer_config=optimizer_config, + trainer_config=trainer_config, + ) + tabular_model.fit(train=train) + + result = tabular_model.evaluate(test) + # print(result[0]["valid_loss"]) + assert "test_accuracy" in result[0].keys() + pred_df = tabular_model.predict(test) + assert pred_df.shape[0] == test.shape[0]