4
4
import random
5
5
from abc import ABC , abstractmethod
6
6
7
+ import numpy as np
8
+
7
9
from aiperf .common .config import UserConfig
8
10
from aiperf .common .enums import ModelSelectionStrategy
9
11
from aiperf .common .mixins import AIPerfLoggerMixin
@@ -29,6 +31,10 @@ def __init__(self, config: UserConfig, tokenizer: Tokenizer, **kwargs):
29
31
# Initialize sequence distribution
30
32
self ._seq_distribution = config .input .prompt .get_sequence_distribution ()
31
33
34
+ # Initialize RNG for sequence distribution sampling (avoid reseeding on each sample)
35
+ seed = getattr (self .config .input , "random_seed" , None )
36
+ self ._seq_rng = np .random .default_rng (seed ) if seed is not None else None
37
+
32
38
@abstractmethod
33
39
def create_dataset (self ) -> list [Conversation ]:
34
40
"""
@@ -73,9 +79,8 @@ def _sample_sequence_lengths(self) -> tuple[int, int]:
73
79
or max (128 , self .config .input .prompt .input_tokens .mean // 2 ),
74
80
)
75
81
76
- # Use random seed from config if available for reproducible results
77
- random_seed = getattr (self .config .input , "random_seed" , None )
78
- return self ._seq_distribution .sample (random_state = random_seed )
82
+ # Use pre-seeded RNG to avoid reseeding on each sample
83
+ return self ._seq_distribution .sample (random_state = self ._seq_rng )
79
84
80
85
def _set_max_tokens (self , turn : Turn ) -> None :
81
86
"""Set max_tokens for the turn based on the sequence distribution or output configuration.
0 commit comments