diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index 14e9712595..e7e7366908 100644 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -232,6 +232,20 @@ def setup_parser(parser): action="store_true", help="Whether to use torch compiled model or not.", ) + parser.add_argument( + "--trust_remote_code", + action="store_true", + help="Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" + "should only be set to `True` for repositories you trust and in which you have read the code, as it will " + "execute code present on the Hub on your local machine.", + ) + parser.add_argument( + "--default_transformers", + action="store_true", + help="Use Hugging Face transformers implementation" + ) + + parser.add_argument("--temperature", default=1.0, type=float, help="Temperature value for text generation") parser.add_argument("--top_p", default=1.0, type=float, help="Top_p value for generating text via sampling") @@ -309,14 +323,17 @@ def generate(size=None, reduce_recompile=False): if torch.is_tensor(input_tokens[t]): input_tokens[t] = input_tokens[t].to(args.device) - outputs = model.generate( - **input_tokens, - generation_config=generation_config, - lazy_mode=use_lazy_mode, - hpu_graphs=args.use_hpu_graphs, - profiling_steps=args.profiling_steps, - profiling_warmup_steps=args.profiling_warmup_steps, - ).cpu() + if args.default_transformers: + outputs = model.generate(**input_tokens).cpu() + else: + outputs = model.generate( + **input_tokens, + generation_config=generation_config, + lazy_mode=use_lazy_mode, + hpu_graphs=args.use_hpu_graphs, + profiling_steps=args.profiling_steps, + profiling_warmup_steps=args.profiling_warmup_steps, + ).cpu() return tokenizer.batch_decode(outputs, skip_special_tokens=True) from optimum.habana.utils import HabanaProfile @@ -489,14 +506,17 @@ def generate_dataset(batch): if torch.is_tensor(batch[t]): batch[t] = batch[t].to(args.device) # Generate new sequences - outputs = model.generate( - **batch, - generation_config=generation_config, - lazy_mode=use_lazy_mode, - hpu_graphs=args.use_hpu_graphs, - profiling_steps=args.profiling_steps, - profiling_warmup_steps=args.profiling_warmup_steps, - ).cpu() + if args.default_transformers: + outputs = model.generate(**batch).cpu() + else: + outputs = model.generate( + **batch, + generation_config=generation_config, + lazy_mode=use_lazy_mode, + hpu_graphs=args.use_hpu_graphs, + profiling_steps=args.profiling_steps, + profiling_warmup_steps=args.profiling_warmup_steps, + ).cpu() return prompt, outputs # warmup diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py index 9b66de8128..f3ee52547e 100644 --- a/examples/text-generation/utils.py +++ b/examples/text-generation/utils.py @@ -125,10 +125,11 @@ def setup_env(args): os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0") os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true") - # Tweak generation so that it runs faster on Gaudi - from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + if not args.default_transformers: + # Tweak generation so that it runs faster on Gaudi + from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - adapt_transformers_to_gaudi() + adapt_transformers_to_gaudi() def setup_device(args): @@ -164,7 +165,7 @@ def setup_model(args, model_dtype, model_kwargs, logger): if args.peft_model is not None: model = peft_model(args, model_dtype, logger, **model_kwargs) else: - model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) + model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, trust_remote_code=args.trust_remote_code, **model_kwargs) if args.quant_config: import habana_quantization_toolkit @@ -187,7 +188,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger): logger.info("DeepSpeed is enabled.") deepspeed.init_distributed(dist_backend="hccl") - config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs) + config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, trust_remote_code=args.trust_remote_code, **model_kwargs) load_to_meta = model_on_meta(config) if load_to_meta: @@ -220,7 +221,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger): model = peft_model(args, model_dtype, logger, **model_kwargs) else: model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs + args.model_name_or_path, torch_dtype=model_dtype, trust_remote_code=args.trust_remote_code, **model_kwargs ) model.eval()