diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index 1be5108461..9af964a630 100644 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -766,7 +766,7 @@ def compute_metrics(eval_preds): metrics = trainer.evaluate() if data_args.streaming: - metrics["eval_samples"] = max_eval_samples + metrics["eval_samples"] = training_args.max_steps * training_args.per_device_eval_batch_size else: max_eval_samples = ( data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)