Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions examples/text-generation/run_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,20 @@ def setup_parser(parser):
action="store_true",
help="Whether to use torch compiled model or not.",
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine.",
)
parser.add_argument(
"--default_transformers",
action="store_true",
help="Use Hugging Face transformers implementation"
)


parser.add_argument("--temperature", default=1.0, type=float, help="Temperature value for text generation")
parser.add_argument("--top_p", default=1.0, type=float, help="Top_p value for generating text via sampling")

Expand Down Expand Up @@ -309,14 +323,17 @@ def generate(size=None, reduce_recompile=False):
if torch.is_tensor(input_tokens[t]):
input_tokens[t] = input_tokens[t].to(args.device)

outputs = model.generate(
**input_tokens,
generation_config=generation_config,
lazy_mode=use_lazy_mode,
hpu_graphs=args.use_hpu_graphs,
profiling_steps=args.profiling_steps,
profiling_warmup_steps=args.profiling_warmup_steps,
).cpu()
if args.default_transformers:
outputs = model.generate(**input_tokens).cpu()
else:
outputs = model.generate(
**input_tokens,
generation_config=generation_config,
lazy_mode=use_lazy_mode,
hpu_graphs=args.use_hpu_graphs,
profiling_steps=args.profiling_steps,
profiling_warmup_steps=args.profiling_warmup_steps,
).cpu()
return tokenizer.batch_decode(outputs, skip_special_tokens=True)

from optimum.habana.utils import HabanaProfile
Expand Down Expand Up @@ -489,14 +506,17 @@ def generate_dataset(batch):
if torch.is_tensor(batch[t]):
batch[t] = batch[t].to(args.device)
# Generate new sequences
outputs = model.generate(
**batch,
generation_config=generation_config,
lazy_mode=use_lazy_mode,
hpu_graphs=args.use_hpu_graphs,
profiling_steps=args.profiling_steps,
profiling_warmup_steps=args.profiling_warmup_steps,
).cpu()
if args.default_transformers:
outputs = model.generate(**batch).cpu()
else:
outputs = model.generate(
**batch,
generation_config=generation_config,
lazy_mode=use_lazy_mode,
hpu_graphs=args.use_hpu_graphs,
profiling_steps=args.profiling_steps,
profiling_warmup_steps=args.profiling_warmup_steps,
).cpu()
return prompt, outputs

# warmup
Expand Down
13 changes: 7 additions & 6 deletions examples/text-generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,11 @@ def setup_env(args):
os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0")
os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true")

# Tweak generation so that it runs faster on Gaudi
from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
if not args.default_transformers:
# Tweak generation so that it runs faster on Gaudi
from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi

adapt_transformers_to_gaudi()
adapt_transformers_to_gaudi()


def setup_device(args):
Expand Down Expand Up @@ -164,7 +165,7 @@ def setup_model(args, model_dtype, model_kwargs, logger):
if args.peft_model is not None:
model = peft_model(args, model_dtype, logger, **model_kwargs)
else:
model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs)
model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, trust_remote_code=args.trust_remote_code, **model_kwargs)
if args.quant_config:
import habana_quantization_toolkit

Expand All @@ -187,7 +188,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger):

logger.info("DeepSpeed is enabled.")
deepspeed.init_distributed(dist_backend="hccl")
config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs)
config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, trust_remote_code=args.trust_remote_code, **model_kwargs)
load_to_meta = model_on_meta(config)

if load_to_meta:
Expand Down Expand Up @@ -220,7 +221,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger):
model = peft_model(args, model_dtype, logger, **model_kwargs)
else:
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs
args.model_name_or_path, torch_dtype=model_dtype, trust_remote_code=args.trust_remote_code, **model_kwargs
)
model.eval()

Expand Down