From ba0be9dbbca36ddcad60d9a3bd30b6a3c2e6272c Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 21 May 2025 22:50:53 +0000 Subject: [PATCH 01/12] add custom dataset --- benchmarks/benchmark_dataset.py | 83 +++++++++++++++++++++++++++++++++ benchmarks/benchmark_serving.py | 29 +++++++++++- 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index d8f48644cc00..4dd006f8b303 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -441,6 +441,89 @@ def sample( return samples +# ----------------------------------------------------------------------------- +# Custom Dataset Implementation +# ----------------------------------------------------------------------------- + + +class CustomDataset(BenchmarkDataset): + """ + Implements the Custom dataset. Loads data from a JSONL file and generates + sample requests based on conversation turns. E.g., + ``` + {"prompt": "What is the capital of India?"} + {"prompt": "What is the capital of Iran?"} + {"prompt": "What is the capital of China?"} + ``` + """ + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.load_data() + + def load_data(self) -> None: + if self.dataset_path is None: + raise ValueError("dataset_path must be provided for loading data.") + + # self.data will be a list of dictionaries + # e.g., [{"prompt": "What is the capital of India?"}, ...] + # This will be the standardized format which load_data() + # has to convert into depending on the filetype of dataset_path. + # sample() will assume this standardized format of self.data + self.data = [] + + # Load the JSONL file + if self.dataset_path.endswith(".jsonl"): + jsonl_data = pd.read_json(path_or_buf=self.dataset_path, + lines=True) + for _, row in jsonl_data.iterrows(): + self.data.append(row.to_dict()) + else: + raise NotImplementedError("Only JSONL format is " \ + "supported for CustomDataset.") + + random.seed(self.random_seed) + random.shuffle(self.data) + + def sample( + self, + tokenizer: PreTrainedTokenizerBase, + num_requests: int, + lora_path: Optional[str] = None, + max_loras: Optional[int] = None, + output_len: Optional[int] = None, + enable_multimodal_chat: bool = False, + skip_chat_template: bool = False, + **kwargs, + ) -> list: + sampled_requests = [] + for item in self.data: + if len(sampled_requests) >= num_requests: + break + prompt = item['prompt'] + + # apply template + if not skip_chat_template: + prompt = tokenizer.apply_chat_template( + [{ + "role": "user", + "content": prompt + }], + add_generation_prompt=True, + tokenize=False) + + prompt_len = len(tokenizer(prompt).input_ids) + sampled_requests.append( + SampleRequest( + prompt=prompt, + prompt_len=prompt_len, + expected_output_len=output_len, + )) + self.maybe_oversample_requests(sampled_requests, num_requests) + + return sampled_requests + + # ----------------------------------------------------------------------------- # Sonnet Dataset Implementation # ----------------------------------------------------------------------------- diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index a887e7150dc7..f5acc5387d17 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -60,6 +60,7 @@ ASRDataset, BurstGPTDataset, ConversationDataset, + CustomDataset, HuggingFaceDataset, InstructCoderDataset, MTBenchDataset, @@ -627,7 +628,16 @@ def main(args: argparse.Namespace): "'--dataset-path' if required." ) - if args.dataset_name == "sonnet": + if args.dataset_name == "custom": + dataset = CustomDataset(dataset_path=args.dataset_path) + input_requests = dataset.sample( + num_requests=args.num_prompts, + tokenizer=tokenizer, + output_len=args.custom_output_len, + skip_chat_template=args.custom_skip_chat_template, + ) + + elif args.dataset_name == "sonnet": dataset = SonnetDataset(dataset_path=args.dataset_path) # For the "sonnet" dataset, formatting depends on the backend. if args.backend == "openai-chat": @@ -886,7 +896,7 @@ def main(args: argparse.Namespace): "--dataset-name", type=str, default="sharegpt", - choices=["sharegpt", "burstgpt", "sonnet", "random", "hf"], + choices=["sharegpt", "burstgpt", "sonnet", "random", "hf", "custom"], help="Name of the dataset to benchmark on.", ) parser.add_argument( @@ -1056,6 +1066,21 @@ def main(args: argparse.Namespace): ) # group for dataset specific arguments + custom_group = parser.add_argument_group("custom dataset options") + custom_group.add_argument( + "--custom-output-len", + type=int, + default=256, + help= + "Number of output tokens per request, used only for custom dataset.", + ) + custom_group.add_argument( + "--custom-skip-chat-template", + action="store_true", + help= + "Skip applying chat template to prompt, used only for custom dataset.", + ) + sonnet_group = parser.add_argument_group("sonnet dataset options") sonnet_group.add_argument( "--sonnet-input-len", From b429fa0499bbaace60cbe9d56cf4410cbece1cde Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 21 May 2025 22:52:31 +0000 Subject: [PATCH 02/12] lint --- benchmarks/benchmark_dataset.py | 21 ++++++++++----------- benchmarks/benchmark_serving.py | 8 +++----- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 4dd006f8b303..387ac0bf55ca 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -474,13 +474,13 @@ def load_data(self) -> None: # Load the JSONL file if self.dataset_path.endswith(".jsonl"): - jsonl_data = pd.read_json(path_or_buf=self.dataset_path, - lines=True) + jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) for _, row in jsonl_data.iterrows(): self.data.append(row.to_dict()) else: - raise NotImplementedError("Only JSONL format is " \ - "supported for CustomDataset.") + raise NotImplementedError( + "Only JSONL format is supported for CustomDataset." + ) random.seed(self.random_seed) random.shuffle(self.data) @@ -500,17 +500,15 @@ def sample( for item in self.data: if len(sampled_requests) >= num_requests: break - prompt = item['prompt'] + prompt = item["prompt"] # apply template if not skip_chat_template: prompt = tokenizer.apply_chat_template( - [{ - "role": "user", - "content": prompt - }], + [{"role": "user", "content": prompt}], add_generation_prompt=True, - tokenize=False) + tokenize=False, + ) prompt_len = len(tokenizer(prompt).input_ids) sampled_requests.append( @@ -518,7 +516,8 @@ def sample( prompt=prompt, prompt_len=prompt_len, expected_output_len=output_len, - )) + ) + ) self.maybe_oversample_requests(sampled_requests, num_requests) return sampled_requests diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index f5acc5387d17..46b2efd4504a 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1071,16 +1071,14 @@ def main(args: argparse.Namespace): "--custom-output-len", type=int, default=256, - help= - "Number of output tokens per request, used only for custom dataset.", + help="Number of output tokens per request, used only for custom dataset.", ) custom_group.add_argument( "--custom-skip-chat-template", action="store_true", - help= - "Skip applying chat template to prompt, used only for custom dataset.", + help="Skip applying chat template to prompt, used only for custom dataset.", ) - + sonnet_group = parser.add_argument_group("sonnet dataset options") sonnet_group.add_argument( "--sonnet-input-len", From 2ae6cbdbaf28a7220ec65d5f246ded8c36d64e74 Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 21 May 2025 22:55:13 +0000 Subject: [PATCH 03/12] remove todo --- benchmarks/benchmark_dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 387ac0bf55ca..81d0dbc182cd 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -9,9 +9,6 @@ - BurstGPT - HuggingFace - VisionArena - -TODO: Implement CustomDataset to parse a JSON file and convert its contents into -SampleRequest instances, similar to the approach used in ShareGPT. """ import base64 From 3e9e1e66da170f6727378e09e24977c184e10107 Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Tue, 27 May 2025 21:19:20 +0000 Subject: [PATCH 04/12] add to datasets.py --- vllm/benchmarks/datasets.py | 85 +++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 74a9b2b03391..f53a4fb47127 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -9,9 +9,6 @@ - BurstGPT - HuggingFace - VisionArena - -TODO: Implement CustomDataset to parse a JSON file and convert its contents into -SampleRequest instances, similar to the approach used in ShareGPT. """ import base64 import io @@ -427,6 +424,88 @@ def sample( )) self.maybe_oversample_requests(samples, num_requests) return samples + + +# ----------------------------------------------------------------------------- +# Custom Dataset Implementation +# ----------------------------------------------------------------------------- + + +class CustomDataset(BenchmarkDataset): + """ + Implements the Custom dataset. Loads data from a JSONL file and generates + sample requests based on conversation turns. E.g., + ``` + {"prompt": "What is the capital of India?"} + {"prompt": "What is the capital of Iran?"} + {"prompt": "What is the capital of China?"} + ``` + """ + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.load_data() + + def load_data(self) -> None: + if self.dataset_path is None: + raise ValueError("dataset_path must be provided for loading data.") + + # self.data will be a list of dictionaries + # e.g., [{"prompt": "What is the capital of India?"}, ...] + # This will be the standardized format which load_data() + # has to convert into depending on the filetype of dataset_path. + # sample() will assume this standardized format of self.data + self.data = [] + + # Load the JSONL file + if self.dataset_path.endswith(".jsonl"): + jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) + for _, row in jsonl_data.iterrows(): + self.data.append(row.to_dict()) + else: + raise NotImplementedError( + "Only JSONL format is supported for CustomDataset." + ) + + random.seed(self.random_seed) + random.shuffle(self.data) + + def sample( + self, + tokenizer: PreTrainedTokenizerBase, + num_requests: int, + lora_path: Optional[str] = None, + max_loras: Optional[int] = None, + output_len: Optional[int] = None, + enable_multimodal_chat: bool = False, + skip_chat_template: bool = False, + **kwargs, + ) -> list: + sampled_requests = [] + for item in self.data: + if len(sampled_requests) >= num_requests: + break + prompt = item["prompt"] + + # apply template + if not skip_chat_template: + prompt = tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + add_generation_prompt=True, + tokenize=False, + ) + + prompt_len = len(tokenizer(prompt).input_ids) + sampled_requests.append( + SampleRequest( + prompt=prompt, + prompt_len=prompt_len, + expected_output_len=output_len, + ) + ) + self.maybe_oversample_requests(sampled_requests, num_requests) + + return sampled_requests # ----------------------------------------------------------------------------- From 9e4a6211ef8857df3f75f7eefbb44aa023a9da2a Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Tue, 27 May 2025 21:30:49 +0000 Subject: [PATCH 05/12] lint --- vllm/benchmarks/datasets.py | 16 +++++++++------- vllm/benchmarks/serve.py | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index f53a4fb47127..d955aae20031 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -424,7 +424,7 @@ def sample( )) self.maybe_oversample_requests(samples, num_requests) return samples - + # ----------------------------------------------------------------------------- # Custom Dataset Implementation @@ -459,13 +459,13 @@ def load_data(self) -> None: # Load the JSONL file if self.dataset_path.endswith(".jsonl"): - jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) + jsonl_data = pd.read_json(path_or_buf=self.dataset_path, + lines=True) for _, row in jsonl_data.iterrows(): self.data.append(row.to_dict()) else: raise NotImplementedError( - "Only JSONL format is supported for CustomDataset." - ) + "Only JSONL format is supported for CustomDataset.") random.seed(self.random_seed) random.shuffle(self.data) @@ -490,7 +490,10 @@ def sample( # apply template if not skip_chat_template: prompt = tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], + [{ + "role": "user", + "content": prompt + }], add_generation_prompt=True, tokenize=False, ) @@ -501,8 +504,7 @@ def sample( prompt=prompt, prompt_len=prompt_len, expected_output_len=output_len, - ) - ) + )) self.maybe_oversample_requests(sampled_requests, num_requests) return sampled_requests diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index dc0ec3219486..2affd97e7d98 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -919,6 +919,7 @@ def main(args: argparse.Namespace): if args.result_filename: file_name = args.result_filename if args.result_dir: + os.makedirs(args.result_dir, exist_ok=True) file_name = os.path.join(args.result_dir, file_name) with open(file_name, "w", encoding='utf-8') as outfile: json.dump(result_json, outfile) From bd6008134ac8fb368f55ee264b7d71abb5130119 Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Tue, 27 May 2025 22:22:31 +0000 Subject: [PATCH 06/12] add pandas --- vllm/benchmarks/datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index d955aae20031..36c0b6a8f76e 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -23,6 +23,7 @@ from typing import Any, Callable, Optional, Union import numpy as np +import pandas as pd from PIL import Image from transformers import PreTrainedTokenizerBase From 3e7c7da9bde96b26947e579cb77f16313e2e3bf6 Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 28 May 2025 19:22:36 +0000 Subject: [PATCH 07/12] fix bug save detailed --- benchmarks/benchmark_serving.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 46b2efd4504a..ecbe1658cdbd 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -844,6 +844,8 @@ def main(args: argparse.Namespace): ]: if field in result_json: del result_json[field] + if field in benchmark_result: + del benchmark_result[field] # Save to file base_model_id = model_id.split("/")[-1] @@ -856,6 +858,7 @@ def main(args: argparse.Namespace): if args.result_filename: file_name = args.result_filename if args.result_dir: + os.makedirs(args.result_dir, exist_ok=True) file_name = os.path.join(args.result_dir, file_name) with open( file_name, mode="a+" if args.append_result else "w", encoding="utf-8" From 4b0b40b1e3db4c9f4cf3286c53c292a9a524e421 Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 28 May 2025 19:36:21 +0000 Subject: [PATCH 08/12] add more check --- benchmarks/benchmark_dataset.py | 11 +++++++++++ vllm/benchmarks/datasets.py | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 1f5f3ff565a9..5ff0c9b2ed1a 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -473,6 +473,17 @@ def load_data(self) -> None: # Load the JSONL file if self.dataset_path.endswith(".jsonl"): jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) + + # check if the JSONL file has a 'prompt' column + if "prompt" not in jsonl_data.columns: + raise ValueError( + "JSONL file must contain a 'prompt' column." + ) + + # Convert each row to a dictionary and append to self.data + # This will convert the DataFrame to a list of dictionaries + # where each dictionary corresponds to a row in the DataFrame. + # This is the standardized format we want for self.data for _, row in jsonl_data.iterrows(): self.data.append(row.to_dict()) else: diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index b3e1ae32e4cb..b4073fe249a3 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -476,6 +476,17 @@ def load_data(self) -> None: if self.dataset_path.endswith(".jsonl"): jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) + + # check if the JSONL file has a 'prompt' column + if "prompt" not in jsonl_data.columns: + raise ValueError( + "JSONL file must contain a 'prompt' column." + ) + + # Convert each row to a dictionary and append to self.data + # This will convert the DataFrame to a list of dictionaries + # where each dictionary corresponds to a row in the DataFrame. + # This is the standardized format we want for self.data for _, row in jsonl_data.iterrows(): self.data.append(row.to_dict()) else: From 005523d872172b63b7011de32040c911dc54fbef Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 28 May 2025 19:54:09 +0000 Subject: [PATCH 09/12] add doc --- benchmarks/README.md | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/benchmarks/README.md b/benchmarks/README.md index ecab570bb31c..7a19eecf3191 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -16,6 +16,12 @@ become available. + + Custom + ✅ + ✅ + Local file: data.jsonl + ShareGPT ✅ @@ -124,6 +130,38 @@ P99 ITL (ms): 8.39 ================================================== ``` +### Custom Dataset +If the dataset you want to benchmark is not supported yet in vLLM, even then you can benchmark on it using `CustomDataset`. Your data needs to be in `.jsonl` format and needs to have "prompt" field per entry, e.g., data.jsonl + +``` +{"prompt": "What is the capital of India?"} +{"prompt": "What is the capital of Iran?"} +{"prompt": "What is the capital of China?"} +``` + +```bash +# start server +VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.1-8B-Instruct --disable-log-requests +``` + +```bash +# run benchmarking script +python3 benchmarks/benchmark_serving.py --port 9001 --save-result --save-detailed \ + --backend vllm \ + --model meta-llama/Llama-3.1-8B-Instruct \ + --endpoint /v1/completions \ + --dataset-name custom \ + --dataset-path \ + --custom-skip-chat-template \ + --num-prompts 80 \ + --max-concurrency 1 \ + --temperature=0.3 \ + --top-p=0.75 \ + --result-dir "./log/" +``` + +You can skip applying chat template if your data already has it by using `--custom-skip-chat-template`. + ### VisionArena Benchmark for Vision Language Models ```bash @@ -203,6 +241,16 @@ python3 vllm/benchmarks/benchmark_serving.py \ --seed 42 ``` +**`philschmid/mt-bench`** + +``` bash +python3 vllm/benchmarks/benchmark_serving.py \ + --model Qwen/QwQ-32B \ + --dataset-name hf \ + --dataset-path philschmid/mt-bench \ + --num-prompts 80 +``` + ### Running With Sampling Parameters When using OpenAI-compatible backends such as `vllm`, optional sampling From 40dfca3257d4a10924ff377ddf65da2be45b20da Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 28 May 2025 19:54:39 +0000 Subject: [PATCH 10/12] lint --- benchmarks/benchmark_dataset.py | 8 +++----- vllm/benchmarks/datasets.py | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 5ff0c9b2ed1a..d86bf045ea47 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -473,13 +473,11 @@ def load_data(self) -> None: # Load the JSONL file if self.dataset_path.endswith(".jsonl"): jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) - + # check if the JSONL file has a 'prompt' column if "prompt" not in jsonl_data.columns: - raise ValueError( - "JSONL file must contain a 'prompt' column." - ) - + raise ValueError("JSONL file must contain a 'prompt' column.") + # Convert each row to a dictionary and append to self.data # This will convert the DataFrame to a list of dictionaries # where each dictionary corresponds to a row in the DataFrame. diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index b4073fe249a3..35cc303f60ee 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -476,13 +476,11 @@ def load_data(self) -> None: if self.dataset_path.endswith(".jsonl"): jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) - + # check if the JSONL file has a 'prompt' column if "prompt" not in jsonl_data.columns: - raise ValueError( - "JSONL file must contain a 'prompt' column." - ) - + raise ValueError("JSONL file must contain a 'prompt' column.") + # Convert each row to a dictionary and append to self.data # This will convert the DataFrame to a list of dictionaries # where each dictionary corresponds to a row in the DataFrame. From db5f71a5694cbc6a168511919122caeabe9f9215 Mon Sep 17 00:00:00 2001 From: ekagra <3116519+ekagra-ranjan@users.noreply.github.com> Date: Wed, 28 May 2025 19:56:57 +0000 Subject: [PATCH 11/12] fix save detail in serve.py --- vllm/benchmarks/serve.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 4b024be221ee..858a0c6a00e4 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -1110,6 +1110,8 @@ def main(args: argparse.Namespace): ]: if field in result_json: del result_json[field] + if field in benchmark_result: + del benchmark_result[field] # Save to file base_model_id = model_id.split("/")[-1] From 11d52d66c5eae0772e5de66f06bc1232449b1504 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Fri, 30 May 2025 10:01:18 -0700 Subject: [PATCH 12/12] format Signed-off-by: Roger Wang --- benchmarks/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index ba5ecc7db6d1..6f9fbb91cbd9 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -16,12 +16,6 @@ become available. - - Custom - ✅ - ✅ - Local file: data.jsonl - ShareGPT ✅ @@ -70,6 +64,12 @@ become available. ✅ lmms-lab/LLaVA-OneVision-Data, Aeala/ShareGPT_Vicuna_unfiltered + + Custom + ✅ + ✅ + Local file: data.jsonl +