Skip to content

Commit

Permalink
update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
morgandu committed Mar 30, 2024
1 parent 71c111c commit 4f41058
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 15 deletions.
16 changes: 10 additions & 6 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -1,31 +1,33 @@
# JetStream Benchmark And Eval

## Install Dependencies
## Install Dependencies

```
cd ~/JetStream/benchmarks
pip install -r requirements.in
```

## Benchmark
## Benchmark

### Prepare DataSet

```
cd ~/data
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
```
```

### Run Benchmark with maxtext tokenizer

```
python benchmark_serving.py \
--tokenizer /home/{username}/maxtext/assets/tokenizer \
--num-prompts 10 \
--dataset ~/data/ShareGPT_V3_unfiltered_cleaned_split.json
--dataset sharegpt \
--dataset-path ~/data/ShareGPT_V3_unfiltered_cleaned_split.json \
--max-output-length 1024
```
```

### Save request outputs in Benchmark

Expand All @@ -35,7 +37,9 @@ Please use --save-request-outputs flag to enable this feature.
python benchmark_serving.py \
--tokenizer /home/{username}/maxtext/assets/tokenizer \
--num-prompts 10 \
--dataset ~/data/ShareGPT_V3_unfiltered_cleaned_split.json \
--dataset sharegpt \
--dataset-path ~/data/ShareGPT_V3_unfiltered_cleaned_split.json \
--max-output-length 1024 \
--save-request-outputs
```
Expand Down
26 changes: 17 additions & 9 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,22 @@
(run with real model and engines)
python -m benchmarks.benchmark_serving \
--tokenizer <your_tokenizer> --dataset <target_dataset_path> \
--tokenizer <your_tokenizer> \
--dataset <target_dataset_name> \
--dataset-path <target_dataset_path> \
--request-rate <request_rate>
(run with mock)
python -m benchmarks.benchmark_serving \
--request-rate 1
e2e example: python3 benchmark_serving.py --tokenizer /home/rwitten/maxtext/assets/tokenizer --num-prompts 100 --dataset ~/ShareGPT_V3_unfiltered_cleaned_split.json
e2e example:
python3 benchmark_serving.py \
--tokenizer /home/{username}/maxtext/assets/tokenizer \
--num-prompts 100 \
--dataset sharegpt \
--dataset-path ~/ShareGPT_V3_unfiltered_cleaned_split.json
"""


Expand Down Expand Up @@ -177,9 +185,9 @@ def load_sharegpt_dataset(
if max_output_length is None:
print("In InputRequest, pass in actual output_length for each sample")
else:
print("In InputRequest, pass in max_output_length: {max_output_length} for each sample")
print(f"In InputRequest, pass in max_output_length: {max_output_length} for each sample")

print(f"The dataset contains {len(dataset)} samples.")
print(f"The dataset contains {len(tokenized_dataset)} samples.")
print(f"The filtered dataset contains {len(filtered_dataset)} samples.")

return filtered_dataset
Expand All @@ -188,7 +196,7 @@ def load_sharegpt_dataset(
def load_openorca_dataset(
dataset_path: str,
tokenizer: Any,
max_output_length: int = None,
max_output_length: Optional[int] = None,
) -> List[InputRequest]:

# Load the dataset.
Expand Down Expand Up @@ -221,9 +229,9 @@ def load_openorca_dataset(
if max_output_length is None:
print("In InputRequest, pass in actual output_length for each sample")
else:
print("In InputRequest, pass in max_output_length: {max_output_length} for each sample")
print(f"In InputRequest, pass in max_output_length: {max_output_length} for each sample")

print(f"The dataset contains {len(dataset)} samples.")
print(f"The dataset contains {len(tokenized_dataset)} samples.")
print(f"The filtered dataset contains {len(filtered_dataset)} samples.")

return filtered_dataset
Expand Down Expand Up @@ -484,22 +492,22 @@ def main(args: argparse.Namespace):
dataset = load_openorca_dataset(
args.dataset_path,
tokenizer,
args.max_output_length
)
elif args.dataset == "sharegpt":
dataset = load_sharegpt_dataset(
args.dataset_path,
tokenizer,
args.conversation_starter,
args.max_output_length
)

filtered_dataset = filter_dataset(dataset)
# A given args.max_output_length value is the max generation step,
# when the args.max_output_length is default to None, the sample's golden output length
# will be used to decide the generation step
input_requests = sample_requests(
dataset,
args.num_prompts,
args.max_output_length
)

if args.warmup_first:
Expand Down

0 comments on commit 4f41058

Please sign in to comment.