update readme

AI-Hypercomputer · Mar 30, 2024 · 4f41058 · 4f41058
1 parent 71c111c
commit 4f41058
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 15 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -1,31 +1,33 @@
 # JetStream Benchmark And Eval
 
-## Install Dependencies 
+## Install Dependencies
 
 ```
 cd ~/JetStream/benchmarks
 pip install -r requirements.in
 ```
 
-## Benchmark 
+## Benchmark
 
 ### Prepare DataSet
 
 ```
 cd ~/data
 wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
-``` 
+```
 
 ### Run Benchmark with maxtext tokenizer
 
 ```
 python benchmark_serving.py \
 --tokenizer /home/{username}/maxtext/assets/tokenizer \
 --num-prompts 10  \
---dataset ~/data/ShareGPT_V3_unfiltered_cleaned_split.json
+--dataset sharegpt \
+--dataset-path ~/data/ShareGPT_V3_unfiltered_cleaned_split.json \
+--max-output-length 1024
 
-``` 
+```
 
 ### Save request outputs in Benchmark
 
@@ -35,7 +37,9 @@ Please use --save-request-outputs flag to enable this feature.
 python benchmark_serving.py \
 --tokenizer /home/{username}/maxtext/assets/tokenizer \
 --num-prompts 10  \
---dataset ~/data/ShareGPT_V3_unfiltered_cleaned_split.json \
+--dataset sharegpt \
+--dataset-path ~/data/ShareGPT_V3_unfiltered_cleaned_split.json \
+--max-output-length 1024  \
 --save-request-outputs
 
 ```

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -35,14 +35,22 @@
 
     (run with real model and engines)
     python -m benchmarks.benchmark_serving \
-        --tokenizer <your_tokenizer> --dataset <target_dataset_path> \
+        --tokenizer <your_tokenizer> \
+        --dataset <target_dataset_name> \
+        --dataset-path <target_dataset_path> \
         --request-rate <request_rate>
 
     (run with mock)
     python -m benchmarks.benchmark_serving \
         --request-rate 1
 
-e2e example: python3 benchmark_serving.py --tokenizer /home/rwitten/maxtext/assets/tokenizer --num-prompts 100  --dataset ~/ShareGPT_V3_unfiltered_cleaned_split.json
+e2e example:
+python3 benchmark_serving.py \
+    --tokenizer /home/{username}/maxtext/assets/tokenizer \
+    --num-prompts 100 \
+    --dataset sharegpt \
+    --dataset-path ~/ShareGPT_V3_unfiltered_cleaned_split.json
+
 """
 
 
@@ -177,9 +185,9 @@ def load_sharegpt_dataset(
   if max_output_length is None:
     print("In InputRequest, pass in actual output_length for each sample")
   else:
-    print("In InputRequest, pass in max_output_length: {max_output_length} for each sample")
+    print(f"In InputRequest, pass in max_output_length: {max_output_length} for each sample")
 
-  print(f"The dataset contains {len(dataset)} samples.")
+  print(f"The dataset contains {len(tokenized_dataset)} samples.")
   print(f"The filtered dataset contains {len(filtered_dataset)} samples.")
 
   return filtered_dataset
@@ -188,7 +196,7 @@ def load_sharegpt_dataset(
 def load_openorca_dataset(
     dataset_path: str,
     tokenizer: Any,
-    max_output_length: int = None,
+    max_output_length: Optional[int] = None,
 ) -> List[InputRequest]:
 
   # Load the dataset.
@@ -221,9 +229,9 @@ def load_openorca_dataset(
   if max_output_length is None:
     print("In InputRequest, pass in actual output_length for each sample")
   else:
-    print("In InputRequest, pass in max_output_length: {max_output_length} for each sample")
+    print(f"In InputRequest, pass in max_output_length: {max_output_length} for each sample")
 
-  print(f"The dataset contains {len(dataset)} samples.")
+  print(f"The dataset contains {len(tokenized_dataset)} samples.")
   print(f"The filtered dataset contains {len(filtered_dataset)} samples.")
 
   return filtered_dataset
@@ -484,22 +492,22 @@ def main(args: argparse.Namespace):
     dataset = load_openorca_dataset(
       args.dataset_path,
       tokenizer,
+      args.max_output_length
     )
   elif args.dataset == "sharegpt":
     dataset = load_sharegpt_dataset(
       args.dataset_path,
       tokenizer,
       args.conversation_starter,
+      args.max_output_length
     )
 
-  filtered_dataset = filter_dataset(dataset)
   # A given args.max_output_length value is the max generation step,
   # when the args.max_output_length is default to None, the sample's golden output length
   # will be used to decide the generation step
   input_requests = sample_requests(
     dataset,
     args.num_prompts,
-    args.max_output_length
   )
 
   if args.warmup_first: