diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py index ee73e578cbd..49166a59e34 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py @@ -40,6 +40,7 @@ type=str, help="tasks list for accuracy validation", ) +parser.add_argument("--add_bos_token", action="store_true", help="whether to add bos token for accuracy validation.") # ============WeightOnlyQuant configs=============== parser.add_argument("--woq", action="store_true") parser.add_argument( @@ -387,7 +388,8 @@ model_args=model_args, tasks = args.tasks, device = "cpu", - batch_size = args.eval_batch_size) + batch_size = args.eval_batch_size, + add_bos_token = args.add_bos_token,) results = evaluate(args) for task_name in args.tasks.split(","): if task_name == "wikitext":