diff --git a/.github/workflows/tests-main.yml b/.github/workflows/tests-main.yml
deleted file mode 100644
index d47251fd258..00000000000
--- a/.github/workflows/tests-main.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: tests on transformers PEFT main
-
-on:
-  push:
-    branches: [ main ]
-
-env:
-  CI_SLACK_CHANNEL: ${{ secrets.CI_PUSH_MAIN_CHANNEL }}
-
-jobs:
-  tests:
-    strategy:
-      matrix:
-        python-version: ['3.9', '3.10', '3.11']
-        os: ['ubuntu-latest', 'windows-latest']
-      fail-fast: false
-    runs-on: ${{ matrix.os }}
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-        cache: "pip"
-        cache-dependency-path: |
-            setup.py
-            requirements.txt
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # install PEFT & transformers from source
-        pip install -U git+https://github.com/huggingface/peft.git
-        pip install -U git+https://github.com/huggingface/transformers.git
-        # cpu version of pytorch
-        pip install ".[test, diffusers]"
-    - name: Test with pytest
-      run: |
-        make test
-    - name: Post to Slack
-      if: always()
-      uses: huggingface/hf-workflows/.github/actions/post-slack@main
-      with:
-        slack_channel: ${{ env.CI_SLACK_CHANNEL }}
-        title: 🤗 Results of the TRL CI on transformers/PEFT main
-        status: ${{ job.status }}
-        slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 46140b0450a..6ba5bcab427 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,4 +1,4 @@
-name: tests
+name: Tests
 
 on:
   push:
@@ -15,76 +15,101 @@ on:
 
 env:
   TQDM_DISABLE: 1
+  CI_SLACK_CHANNEL: ${{ secrets.CI_PUSH_MAIN_CHANNEL }}
 
 jobs:
-  check_code_quality:
-    runs-on: ubuntu-latest
+  tests:
+    name: Tests
     strategy:
       matrix:
-        python-version: [3.9]
-
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+        os: ['ubuntu-latest', 'windows-latest']
+      fail-fast: false
+    runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: recursive
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-      - uses: pre-commit/action@v3.0.1
+          cache: "pip"
+          cache-dependency-path: |
+              setup.py
+              requirements.txt
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install ".[dev]"
+      - name: Test with pytest
+        run: |
+          make test
+      - name: Post to Slack
+        if: github.ref == 'refs/heads/main' && always()  # Check if the branch is main
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
         with:
-          extra_args: --all-files
+          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
+          title: 🤗 Results of the TRL CI with dev dependencies
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
-  tests:
-    needs: check_code_quality
-    strategy:
-      matrix:
-        python-version: ['3.9', '3.10', '3.11']
-        os: ['ubuntu-latest', 'windows-latest']
-    runs-on: ${{ matrix.os }}
+  tests_dev:
+    name: Tests with dev dependencies
+    runs-on: 'ubuntu-latest'
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-        cache: "pip"
-        cache-dependency-path: |
-            setup.py
-            requirements.txt
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # install PEFT & transformers from source
-        pip install -U git+https://github.com/huggingface/peft.git
-        pip install -U git+https://github.com/huggingface/transformers.git
-        # cpu version of pytorch
-        pip install ".[test, diffusers]"
-    - name: Test with pytest
-      run: |
-        make test
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: "pip"
+          cache-dependency-path: |
+              setup.py
+              requirements.txt
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -U git+https://github.com/huggingface/accelerate.git
+          python -m pip install -U git+https://github.com/huggingface/datasets.git
+          python -m pip install -U git+https://github.com/huggingface/transformers.git
+          python -m pip install ".[dev]"
+      - name: Test with pytest
+        run: |
+          make test
+      - name: Post to Slack
+        if: github.ref == 'refs/heads/main' && always()  # Check if the branch is main
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
+          title: 🤗 Results of the TRL CI with dev dependencies
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
-  tests_no_optional_dep:
-    needs: check_code_quality
+  tests_wo_optional_deps:
+    name: Tests without optional dependencies
     runs-on: 'ubuntu-latest'
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.9'
-        cache: "pip"
-        cache-dependency-path: |
-            setup.py
-            requirements.txt
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # install transformers from source
-        pip install -U git+https://github.com/huggingface/transformers.git
-        # cpu version of pytorch
-        pip install .[test]
-    - name: Test with pytest
-      run: |
-        make test
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: "pip"
+          cache-dependency-path: |
+              setup.py
+              requirements.txt
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install ".[test]"
+      - name: Test with pytest
+        run: |
+          make test
+      - name: Post to Slack
+        if: github.ref == 'refs/heads/main' && always()  # Check if the branch is main
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
+          title: 🤗 Results of the TRL CI with dev dependencies
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 49758a53ae3..c84bdbe3a3d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,4 @@
-datasets>=1.17.0
-torch>=1.4.0
-tqdm
-transformers>=4.46.0
 accelerate
-peft>=0.3.0
-tyro>=0.5.7
\ No newline at end of file
+datasets
+rich
+transformers>=4.46.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index cf2a0fd46c2..ee58373ed13 100644
--- a/setup.py
+++ b/setup.py
@@ -76,33 +76,23 @@
 __version__ = "0.12.0.dev0"  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
 
 REQUIRED_PKGS = [
-    "torch>=1.4.0",
-    "transformers>=4.46.0",
-    "numpy>=1.18.2;platform_system!='Windows'",
-    "numpy<2;platform_system=='Windows'",
     "accelerate",
     "datasets",
-    "tyro>=0.5.11",
+    "rich",  # rich shouldn't be a required package for trl, we should remove it from here
+    "transformers>=4.46.0",
 ]
 EXTRAS = {
-    "test": [
-        "parameterized",
-        "peft>=0.8.0",
-        "pytest",
-        "pytest-xdist",
-        "pytest-cov",
-        "pytest-xdist",
-        "scikit-learn",
-        "Pillow",
-        "pytest-rerunfailures",
-        "llm-blender>=0.0.2",
-    ],
-    "peft": ["peft>=0.8.0"],
-    "liger": ["liger-kernel>=0.2.1"],
+    # Windows support is partially supported with DeepSpeed https://github.com/microsoft/DeepSpeed/tree/master#windows
+    "deepspeed": ["deepspeed>=0.14.4; sys_platform != 'win32'"],
     "diffusers": ["diffusers>=0.18.0"],
-    "deepspeed": ["deepspeed>=0.14.4"],
-    "quantization": ["bitsandbytes<=0.41.1"],
+    # liger-kernel depends on triton, which is only available on Linux https://github.com/triton-lang/triton#compatibility
+    "liger": ["liger-kernel>=0.2.1; sys_platform != 'win32'"],
     "llm_judge": ["openai>=1.23.2", "llm-blender>=0.0.2"],
+    "peft": ["peft>=0.8.0"],
+    "quantization": ["bitsandbytes"],
+    "scikit": ["scikit-learn"],
+    "test": ["parameterized", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "pytest"],
+    "vlm": ["Pillow"],
 }
 EXTRAS["dev"] = []
 for reqs in EXTRAS.values():
@@ -126,6 +116,7 @@
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
+            "Programming Language :: Python :: 3.12",
         ],
         url="https://github.com/huggingface/trl",
         entry_points={
@@ -136,7 +127,7 @@
         packages=find_packages(exclude={"tests"}),
         install_requires=REQUIRED_PKGS,
         extras_require=EXTRAS,
-        python_requires=">=3.7",
+        python_requires=">=3.9",
         long_description=open("README.md", encoding="utf-8").read(),
         long_description_content_type="text/markdown",
         zip_safe=False,
diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
index a090f60aeda..156e422a855 100644
--- a/tests/test_dpo_trainer.py
+++ b/tests/test_dpo_trainer.py
@@ -30,7 +30,11 @@
     AutoTokenizer,
     PreTrainedTokenizerBase,
 )
-from transformers.testing_utils import require_bitsandbytes, require_peft
+from transformers.testing_utils import (
+    require_bitsandbytes,
+    require_peft,
+    require_torch_gpu_if_bnb_not_multi_backend_enabled,
+)
 
 from trl import DPOConfig, DPOTrainer, FDivergenceType
 
@@ -601,7 +605,7 @@ def test_dpo_lora_save(self):
                 self.fail("Loading the saved peft adapter failed")
 
     @require_peft
-    @require_bitsandbytes
+    @require_torch_gpu_if_bnb_not_multi_backend_enabled
     def test_dpo_lora_bf16_autocast_llama(self):
         # Note this test only works on compute capability > 7 GPU devices
         from peft import LoraConfig
diff --git a/tests/test_peft_models.py b/tests/test_peft_models.py
index d4f87b5730a..04539dbcf46 100644
--- a/tests/test_peft_models.py
+++ b/tests/test_peft_models.py
@@ -17,7 +17,10 @@
 
 import torch
 from transformers import AutoModelForCausalLM
-from transformers.testing_utils import require_bitsandbytes, require_peft
+from transformers.testing_utils import (
+    require_peft,
+    require_torch_gpu_if_bnb_not_multi_backend_enabled,
+)
 from transformers.utils import is_peft_available
 
 from trl import AutoModelForCausalLMWithValueHead
@@ -95,7 +98,7 @@ def test_create_peft_model_from_config(self):
         nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
         assert nb_trainable_params == 10273
 
-    @require_bitsandbytes
+    @require_torch_gpu_if_bnb_not_multi_backend_enabled
     def test_create_bnb_peft_model_from_config(self):
         r"""
         Simply creates a peft model and checks that it can be loaded.
diff --git a/trl/trainer/judges.py b/trl/trainer/judges.py
index 96ab36d88d1..04ef4ec964d 100644
--- a/trl/trainer/judges.py
+++ b/trl/trainer/judges.py
@@ -21,7 +21,6 @@
 import numpy as np
 from accelerate import Accelerator
 from huggingface_hub import InferenceClient
-from scipy.special import softmax
 from transformers.utils import is_openai_available
 
 from ..import_utils import is_llmblender_available
@@ -239,7 +238,13 @@ def judge(
             ranks[flip_mask] = ranks[flip_mask][:, ::-1]
 
         # Return the ranks or score probability
-        return softmax(ranks, axis=-1)[:, 0].tolist() if return_scores else ranks[:, 0].tolist()
+        if return_scores:
+            logit_max = np.amax(ranks, axis=-1, keepdims=True)
+            exp_logit_shifted = np.exp(ranks - logit_max)
+            probs = exp_logit_shifted / np.sum(exp_logit_shifted, axis=-1, keepdims=True)
+            return probs[:, 0].tolist()
+        else:
+            return ranks[:, 0].tolist()
 
 
 class HfPairwiseJudge(BasePairwiseJudge):