diff --git a/docs/source/clis.md b/docs/source/clis.md
index 666584decf4..54c8c1055f9 100644
--- a/docs/source/clis.md
+++ b/docs/source/clis.md
@@ -26,7 +26,7 @@ Currently supported commands are:
 
 You can launch training directly from the CLI by specifying required arguments like the model and dataset:
 
-<hfoptions id="command_line">
+<hfoptions id="trainer">
 <hfoption id="SFT">
 
 ```bash
@@ -53,6 +53,35 @@ trl reward \
   --dataset_name trl-lib/ultrafeedback_binarized
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```bash
+trl grpo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name HuggingFaceH4/Polaris-Dataset-53K \
+  --reward_funcs accuracy_reward
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```bash
+trl rloo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name HuggingFaceH4/Polaris-Dataset-53K \
+  --reward_funcs accuracy_reward
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```bash
+trl kto \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/kto-mix-14k
+```
+
 </hfoption>
 </hfoptions>
 
@@ -60,7 +89,7 @@ trl reward \
 
 To keep your CLI commands clean and reproducible, you can define all training arguments in a YAML configuration file:
 
-<hfoptions id="config_file">
+<hfoptions id="trainer">
 <hfoption id="SFT">
 
 ```yaml
@@ -105,6 +134,55 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: HuggingFaceH4/Polaris-Dataset-53K
+reward_funcs:
+  - accuracy_reward
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: HuggingFaceH4/Polaris-Dataset-53K
+reward_funcs:
+  - accuracy_reward
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>
 
@@ -114,8 +192,8 @@ TRL CLI natively supports [🤗 Accelerate](https://huggingface.co/docs/accelera
 
 You can pass any `accelerate launch` arguments directly to `trl`, such as `--num_processes`. For more information see [Using accelerate launch](https://huggingface.co/docs/accelerate/en/basic_tutorials/launch#using-accelerate-launch).
 
-<hfoptions id="launch_args">
-<hfoption id="SFT inline">
+<hfoptions id="trainer">
+<hfoption id="SFT">
 
 ```bash
 trl sft \
@@ -124,8 +202,7 @@ trl sft \
   --num_processes 4
 ```
 
-</hfoption>
-<hfoption id="SFT w/ config file">
+or, with a config file:
 
 ```yaml
 # sft_config.yaml
@@ -141,7 +218,7 @@ trl sft --config sft_config.yaml
 ```
 
 </hfoption>
-<hfoption id="DPO inline">
+<hfoption id="DPO">
 
 ```bash
 trl dpo \
@@ -150,8 +227,7 @@ trl dpo \
   --num_processes 4
 ```
 
-</hfoption>
-<hfoption id="DPO w/ config file">
+or, with a config file:
 
 ```yaml
 # dpo_config.yaml
@@ -167,7 +243,7 @@ trl dpo --config dpo_config.yaml
 ```
 
 </hfoption>
-<hfoption id="Reward inline">
+<hfoption id="Reward">
 
 ```bash
 trl reward \
@@ -176,8 +252,7 @@ trl reward \
   --num_processes 4
 ```
 
-</hfoption>
-<hfoption id="Reward w/ config file">
+or, with a config file:
 
 ```yaml
 # reward_config.yaml
@@ -192,6 +267,87 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```bash
+trl grpo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name HuggingFaceH4/Polaris-Dataset-53K \
+  --reward_funcs accuracy_reward \
+  --num_processes 4
+```
+
+or, with a config file:
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: HuggingFaceH4/Polaris-Dataset-53K
+reward_funcs:
+  - accuracy_reward
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```bash
+trl rloo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name HuggingFaceH4/Polaris-Dataset-53K \
+  --reward_funcs accuracy_reward \
+  --num_processes 4
+```
+
+or, with a config file:
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: HuggingFaceH4/Polaris-Dataset-53K
+reward_funcs:
+  - accuracy_reward
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```bash
+trl kto \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/kto-mix-14k \
+  --num_processes 4
+```
+
+or, with a config file:
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>
 
@@ -220,8 +376,8 @@ To use one of these, just pass the name to `--accelerate_config`. TRL will autom
 
 #### Example Usage
 
-<hfoptions id="accelerate_config">
-<hfoption id="SFT inline">
+<hfoptions id="trainer">
+<hfoption id="SFT">
 
 ```bash
 trl sft \
@@ -230,8 +386,7 @@ trl sft \
   --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
 ```
 
-</hfoption>
-<hfoption id="SFT w/ config file">
+or, with a config file:
 
 ```yaml
 # sft_config.yaml
@@ -247,7 +402,7 @@ trl sft --config sft_config.yaml
 ```
 
 </hfoption>
-<hfoption id="DPO inline">
+<hfoption id="DPO">
 
 ```bash
 trl dpo \
@@ -256,8 +411,7 @@ trl dpo \
   --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
 ```
 
-</hfoption>
-<hfoption id="DPO w/ config file">
+or, with a config file:
 
 ```yaml
 # dpo_config.yaml
@@ -273,7 +427,7 @@ trl dpo --config dpo_config.yaml
 ```
 
 </hfoption>
-<hfoption id="Reward inline">
+<hfoption id="Reward">
 
 ```bash
 trl reward \
@@ -282,8 +436,7 @@ trl reward \
   --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
 ```
 
-</hfoption>
-<hfoption id="Reward w/ config file">
+or, with a config file:
 
 ```yaml
 # reward_config.yaml
@@ -298,6 +451,87 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```bash
+trl grpo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name HuggingFaceH4/Polaris-Dataset-53K \
+  --reward_funcs accuracy_reward \
+  --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
+```
+
+or, with a config file:
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: HuggingFaceH4/Polaris-Dataset-53K
+reward_funcs:
+  - accuracy_reward
+accelerate_config: zero2  # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```bash
+trl rloo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name HuggingFaceH4/Polaris-Dataset-53K \
+  --reward_funcs accuracy_reward \
+  --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
+```
+
+or, with a config file:
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: HuggingFaceH4/Polaris-Dataset-53K
+reward_funcs:
+  - accuracy_reward
+accelerate_config: zero2  # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```bash
+trl kto \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/kto-mix-14k \
+  --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
+```
+
+or, with a config file:
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+accelerate_config: zero2  # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>
 
@@ -305,7 +539,7 @@ trl reward --config reward_config.yaml
 
 You can use dataset mixtures to combine multiple datasets into a single training dataset. This is useful for training on diverse data sources or when you want to mix different types of data.
 
-<hfoptions id="dataset_mixtures">
+<hfoptions id="trainer">
 <hfoption id="SFT">
 
 ```yaml
@@ -356,6 +590,61 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+  - path: HuggingFaceH4/Polaris-Dataset-53K
+  - path: trl-lib/DeepMath-103K
+reward_funcs:
+  - accuracy_reward
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+  - path: HuggingFaceH4/Polaris-Dataset-53K
+  - path: trl-lib/DeepMath-103K
+reward_funcs:
+  - accuracy_reward
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+  - path: trl-lib/kto-mix-14k
+  - path: argilla/ultrafeedback-binarized-preferences-cleaned
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>