diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py index 92ad6632e5..86e30f7bea 100644 --- a/examples/text-generation/run_lm_eval.py +++ b/examples/text-generation/run_lm_eval.py @@ -38,8 +38,6 @@ from transformers.generation import GenerationConfig from utils import finalize_quantization, initialize_model, save_model -from optimum.habana.utils import HabanaGenerationTime, get_hpu_memory_stats - os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") logger = logging.getLogger(__name__) @@ -338,6 +336,11 @@ def main() -> None: args = setup_lm_eval_parser() model, _, tokenizer, generation_config = initialize_model(args, logger) + # Delayed import: optimum.habana.utils is imported here to ensure that + # environment variables and runtime configurations are properly initialized + # before loading modules that depend on them. + from optimum.habana.utils import HabanaGenerationTime, get_hpu_memory_stats + with torch.no_grad(): lm = HabanaModelAdapter(tokenizer, model, args, generation_config)