From 5a64857e34c38c214b60b810c96cc4fe8c01a16c Mon Sep 17 00:00:00 2001
From: Pramod Kumar <prkumar@habana.ai>
Date: Tue, 2 Jul 2024 17:00:17 +0300
Subject: [PATCH] [SW-191023][PyTorch][Optimum-Habana-fork]: Add flag to run
 inference with partial dataset

---
 examples/text-generation/run_generation.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
index eb133b8ba7..279cfbd282 100755
--- a/examples/text-generation/run_generation.py
+++ b/examples/text-generation/run_generation.py
@@ -323,6 +323,11 @@ def __call__(self, parser, namespace, values, option_string=None):
         action="store_true",
         help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
     )
+    parser.add_argument(
+        "--run_partial_dataset",
+        action="store_true",
+        help="Run the inference with dataset for specified --n_iterations(default:5)",
+    )
     args = parser.parse_args()
 
     if args.torch_compile:
@@ -836,6 +841,8 @@ def generate_dataset(batch):
                 f"Output: {tokenizer.batch_decode(outputs, skip_special_tokens=True)[:args.batch_size*args.num_return_sequences]}"
             )
             print(separator)
+            if args.run_partial_dataset and args.n_iterations == i+1:
+                break
         t_end = time.time()
 
         throughput = total_new_tokens_generated / duration