diff --git a/README.md b/README.md index 91c993d..6ea4c58 100644 --- a/README.md +++ b/README.md @@ -66,4 +66,28 @@ If you're planning to use some closed-source APIs, you also need to set the toke export OPENAI_API_KEY= export ANTHROPIC_API_KEY= export GEMINI_API_KEY= -``` \ No newline at end of file +``` + +Say we want to obtain the preferences of `gpt-4-2024-04-09`: + +```sh +export OPENAI_API_KEY= +python -m scripts/run_generative.py \ + --dataset_name $DATASET \ + --split $SPLIT \ + --model gpt-4-turbo-2024-04-09 \ + --output_dir $OUTDIR +``` + +You can also run open-source LMs in a generative fashion. +The inference is then routed through [vLLM](https://github.com/vllm-project/vllm). +Here's an example using `meta-llama/Meta-Llama-3-70B-Instruct`: + +```sh +python -m scripts/run_generative.py \ + --dataset_name $DATASET \ + --split $SPLIT \ + --model "meta-llama/Meta-Llama-3-70B-Instruct" \ + --num_gpus 4 \ + --output_dir $OUTDIR +``` diff --git a/scripts/run_generative.py b/scripts/run_generative.py index 561045c..0814118 100644 --- a/scripts/run_generative.py +++ b/scripts/run_generative.py @@ -32,6 +32,7 @@ import os import sys from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path import numpy as np from datasets import load_dataset @@ -58,6 +59,7 @@ def get_args(): parser.add_argument("--dataset_name", type=str, required=True, help="name of dataset to test on") parser.add_argument("--split", default="test", type=str, required=True, help="dataset split to evaluate") parser.add_argument("--model", type=str, nargs="+", required=True, help="name of model to use") + parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the results.") parser.add_argument("--chat_template", type=str, default=None, help="fastchat chat template (optional)") parser.add_argument("--trust_remote_code", action="store_true", default=False, help="directly load model instead of pipeline") parser.add_argument("--num_gpus", type=int, default=1, help="number of gpus to use, for multi-node vllm") @@ -353,7 +355,8 @@ def process_shuffled(win, shuffle): }, } - file_path = f"{model_name.replace('/', '___')}.json" + output_dir = Path(args.output_dir) + file_path = output_dir / f"{model_name.replace('/', '___')}.json" with open(file_path, "w") as f: json.dump(results_dict, f, indent=4)