diff --git a/README.md b/README.md
index 91c993d..6ea4c58 100644
--- a/README.md
+++ b/README.md
@@ -66,4 +66,28 @@ If you're planning to use some closed-source APIs, you also need to set the toke
 export OPENAI_API_KEY=<your openai token>
 export ANTHROPIC_API_KEY=<your anthropic token>
 export GEMINI_API_KEY=<your gemini token>
-```
\ No newline at end of file
+```
+
+Say we want to obtain the preferences of `gpt-4-2024-04-09`:
+
+```sh
+export OPENAI_API_KEY=<your openai token>
+python -m scripts/run_generative.py \
+    --dataset_name $DATASET \
+    --split $SPLIT \
+    --model gpt-4-turbo-2024-04-09 \
+    --output_dir $OUTDIR 
+```
+
+You can also run open-source LMs in a generative fashion. 
+The inference is then routed through [vLLM](https://github.com/vllm-project/vllm).
+Here's an example using `meta-llama/Meta-Llama-3-70B-Instruct`:
+
+```sh
+python -m scripts/run_generative.py \
+    --dataset_name $DATASET \
+    --split $SPLIT \
+    --model "meta-llama/Meta-Llama-3-70B-Instruct" \
+    --num_gpus 4 \
+    --output_dir $OUTDIR
+```
diff --git a/scripts/run_generative.py b/scripts/run_generative.py
index 561045c..0814118 100644
--- a/scripts/run_generative.py
+++ b/scripts/run_generative.py
@@ -32,6 +32,7 @@
 import os
 import sys
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
 
 import numpy as np
 from datasets import load_dataset
@@ -58,6 +59,7 @@ def get_args():
     parser.add_argument("--dataset_name", type=str, required=True, help="name of dataset to test on")
     parser.add_argument("--split", default="test", type=str, required=True, help="dataset split to evaluate")
     parser.add_argument("--model", type=str, nargs="+", required=True, help="name of model to use")
+    parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the results.")
     parser.add_argument("--chat_template", type=str, default=None, help="fastchat chat template (optional)")
     parser.add_argument("--trust_remote_code", action="store_true", default=False, help="directly load model instead of pipeline")
     parser.add_argument("--num_gpus", type=int, default=1, help="number of gpus to use, for multi-node vllm")
@@ -353,7 +355,8 @@ def process_shuffled(win, shuffle):
         },
     }
 
-    file_path = f"{model_name.replace('/', '___')}.json"
+    output_dir = Path(args.output_dir)
+    file_path = output_dir / f"{model_name.replace('/', '___')}.json"
     with open(file_path, "w") as f:
         json.dump(results_dict, f, indent=4)