diff --git a/agents/codex-1073.md b/agents/codex-1073.md new file mode 100644 index 00000000..678dfa89 --- /dev/null +++ b/agents/codex-1073.md @@ -0,0 +1 @@ + diff --git a/pa_core/cli.py b/pa_core/cli.py index 1474af79..22b5148f 100644 --- a/pa_core/cli.py +++ b/pa_core/cli.py @@ -737,7 +737,6 @@ def _emit_run_end() -> None: from .facade import run_single from .logging_utils import setup_json_logging from .manifest import ManifestWriter - from .random import spawn_agent_rngs_with_ids, spawn_rngs from .reporting.attribution import ( compute_sleeve_cvar_contribution, compute_sleeve_return_attribution, @@ -746,6 +745,7 @@ def _emit_run_end() -> None: ) from .reporting.sweep_excel import export_sweep_results from .run_flags import RunFlags + from .sim.simulation_initialization import initialize_sweep_rngs from .sleeve_suggestor import suggest_sleeve_sizes from .stress import apply_stress_preset from .sweep import run_parameter_sweep @@ -917,21 +917,29 @@ def _emit_run_end() -> None: # Capture raw params after user-driven config adjustments (mode/stress/suggestions) raw_params = cfg.model_dump() - substream_ids: dict[str, str] | None = None + substream_ids: Mapping[str, str] | None = None if ( cfg.analysis_mode in ["capital", "returns", "alpha_shares", "vol_mult"] and not args.sensitivity ): # Parameter sweep mode - rng_returns = spawn_rngs(args.seed, 1)[0] fin_agent_names = ["internal", "external_pa", "active_ext"] - fin_rngs, substream_ids = spawn_agent_rngs_with_ids( + rng_bundle = initialize_sweep_rngs( args.seed, - fin_agent_names, - legacy_order=args.legacy_agent_rng, + legacy_agent_rng=args.legacy_agent_rng, + financing_agents=fin_agent_names, + ) + rng_returns = rng_bundle.rng_returns + fin_rngs = rng_bundle.rngs_financing + substream_ids = rng_bundle.substream_ids + results = run_parameter_sweep( + cfg, + idx_series, + rng_returns, + fin_rngs, + seed=rng_bundle.seed, ) - results = run_parameter_sweep(cfg, idx_series, rng_returns, fin_rngs) sweep_metadata = {"rng_seed": args.seed, "substream_ids": substream_ids} export_sweep_results(results, filename=args.output, metadata=sweep_metadata) _record_artifact(args.output) diff --git a/pa_core/facade.py b/pa_core/facade.py index 64f00156..1db7bf98 100644 --- a/pa_core/facade.py +++ b/pa_core/facade.py @@ -218,7 +218,6 @@ def run_single( from .agents.registry import build_from_config from .backend import resolve_and_set_backend - from .random import spawn_agent_rngs_with_ids, spawn_rngs from .sim import draw_financing_series, draw_joint_returns from .sim.covariance import build_cov_matrix from .sim.metrics import summary_table @@ -233,6 +232,7 @@ def run_single( resolve_regime_start, simulate_regime_paths, ) + from .sim.simulation_initialization import initialize_run_rngs from .simulations import simulate_agents from .units import normalize_return_inputs from .validators import select_vol_regime_sigma @@ -297,11 +297,14 @@ def run_single( return_overrides=build_covariance_return_overrides(sigma_vec, corr_mat), ) - base_rng = spawn_rngs(run_options.seed, 1)[0] - child_seeds = base_rng.integers(0, 2**32, size=3, dtype="uint32") - rng_returns = spawn_rngs(int(child_seeds[0]), 1)[0] - rng_regime = spawn_rngs(int(child_seeds[1]), 1)[0] - fin_seed = int(child_seeds[2]) + rng_bundle = initialize_run_rngs( + run_options.seed, + legacy_agent_rng=run_options.legacy_agent_rng, + ) + rng_returns = rng_bundle.rng_returns + rng_regime = rng_bundle.rng_regime + fin_rngs = rng_bundle.rngs_financing + substream_ids = rng_bundle.substream_ids regime_params = None regime_paths = None regime_labels = None @@ -331,11 +334,6 @@ def run_single( regime_params=regime_params, ) corr_repair_info = params.get("_correlation_repair_info") - fin_rngs, substream_ids = spawn_agent_rngs_with_ids( - fin_seed, - ["internal", "external_pa", "active_ext"], - legacy_order=run_options.legacy_agent_rng, - ) f_int, f_ext, f_act = draw_financing_series( n_months=run_cfg.N_MONTHS, n_sim=run_cfg.N_SIMULATIONS, @@ -421,7 +419,7 @@ def run_sweep( import pandas as pd from .backend import resolve_and_set_backend - from .random import spawn_agent_rngs_with_ids, spawn_rngs + from .sim.simulation_initialization import initialize_sweep_rngs from .sweep import run_parameter_sweep, sweep_results_to_dataframe run_options = options or RunOptions() @@ -436,14 +434,14 @@ def run_sweep( elif not isinstance(idx_series, pd.Series): raise ValueError("Index data must be a pandas Series") - base_rng = spawn_rngs(run_options.seed, 1)[0] - child_seed = int(base_rng.integers(0, 2**32, dtype="uint32")) - rng_returns = spawn_rngs(child_seed, 1)[0] - fin_rngs, substream_ids = spawn_agent_rngs_with_ids( - child_seed, - ["internal", "external_pa", "active_ext"], - legacy_order=run_options.legacy_agent_rng, + rng_bundle = initialize_sweep_rngs( + run_options.seed, + legacy_agent_rng=run_options.legacy_agent_rng, ) + rng_returns = rng_bundle.rng_returns + fin_rngs = rng_bundle.rngs_financing + substream_ids = rng_bundle.substream_ids + child_seed = rng_bundle.seed results = run_parameter_sweep( run_cfg, idx_series, diff --git a/pa_core/sim/paths.py b/pa_core/sim/paths.py index 261567a0..f8d3c354 100644 --- a/pa_core/sim/paths.py +++ b/pa_core/sim/paths.py @@ -8,11 +8,11 @@ from numpy.typing import NDArray from ..backend import xp as np -from ..random import spawn_rngs from ..types import GeneratorLike from ..validators import NUMERICAL_STABILITY_EPSILON from .financing import draw_financing_series, simulate_financing from .params import CANONICAL_PARAMS_MARKER, CANONICAL_PARAMS_VERSION +from .simulation_initialization import ensure_rng __all__ = [ "simulate_financing", @@ -269,9 +269,7 @@ def prepare_mc_universe( raise ValueError("N_SIMULATIONS and N_MONTHS must be positive") if cov_mat.shape != (4, 4): raise ValueError("cov_mat must be 4×4 and ordered as [idx, H, E, M]") - if rng is None: - rng = spawn_rngs(seed, 1)[0] - assert rng is not None + rng = ensure_rng(seed, rng) distributions = _resolve_return_distributions(return_distribution, return_distributions) _validate_return_draw_settings(distributions, return_copula, return_t_df) mean = np.array([mu_idx, mu_H, mu_E, mu_M]) @@ -320,9 +318,7 @@ def prepare_return_shocks( rng: Optional[GeneratorLike] = None, ) -> Dict[str, Any]: """Pre-generate return shocks to reuse across parameter combinations.""" - if rng is None: - rng = spawn_rngs(seed, 1)[0] - assert rng is not None + rng = ensure_rng(seed, rng) distribution = params.get("return_distribution", "normal") dist_overrides = ( params.get("return_distribution_idx"), @@ -443,9 +439,7 @@ def draw_returns( shocks_out[..., i] = z[..., i] * (scale / denom) sims = μ + shocks_out * σ else: - if rng is None: - rng = spawn_rngs(seed, 1)[0] - assert rng is not None + rng = ensure_rng(seed, rng) if all(dist == "normal" for dist in distributions): Σ = corr * (σ[:, None] * σ[None, :]) sims = _safe_multivariate_normal(rng, μ, Σ, (n_sim, n_months)) @@ -507,8 +501,7 @@ def draw_joint_returns( When ``regime_params`` is provided, returns are drawn from regime-specific parameters based on ``regime_paths``. """ - if rng is None and seed is not None: - rng = spawn_rngs(seed, 1)[0] + rng = ensure_rng(seed, rng) if regime_params is None: return draw_returns( n_months=n_months, @@ -579,9 +572,7 @@ def simulate_alpha_streams( distributions = _resolve_return_distributions(return_distribution, return_distributions) _validate_return_draw_settings(distributions, return_copula, return_t_df) means = np.array([mu_idx, mu_H, mu_E, mu_M]) - if rng is None: - rng = spawn_rngs(seed, 1)[0] - assert rng is not None + rng = ensure_rng(seed, rng) if all(dist == "normal" for dist in distributions): return _safe_multivariate_normal(rng, means, cov, (T, 1))[:, 0, :] sigma = np.sqrt(np.clip(np.diag(cov), 0.0, None)) diff --git a/pa_core/sim/regimes.py b/pa_core/sim/regimes.py index 338208f0..9ed03988 100644 --- a/pa_core/sim/regimes.py +++ b/pa_core/sim/regimes.py @@ -6,10 +6,10 @@ from ..backend import xp as np from ..config import ModelConfig -from ..random import spawn_rngs from ..types import GeneratorLike from .covariance import build_cov_matrix from .params import build_simulation_params +from .simulation_initialization import ensure_rng def _cov_to_corr_and_sigma(cov: npt.NDArray[Any]) -> tuple[npt.NDArray[Any], npt.NDArray[Any]]: @@ -119,8 +119,7 @@ def simulate_regime_paths( n_regimes = int(transition_mat.shape[0]) if not 0 <= start_state < n_regimes: raise ValueError("start_state must be within regime index range") - if rng is None: - rng = spawn_rngs(seed, 1)[0] + rng = ensure_rng(seed, rng) paths = np.empty((n_sim, n_months), dtype=int) paths[:, 0] = start_state diff --git a/pa_core/sim/simulation_initialization.py b/pa_core/sim/simulation_initialization.py new file mode 100644 index 00000000..c4900baa --- /dev/null +++ b/pa_core/sim/simulation_initialization.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Mapping, Sequence + +from ..random import spawn_agent_rngs_with_ids, spawn_rngs +from ..types import GeneratorLike + +DEFAULT_FINANCING_AGENTS: tuple[str, ...] = ("internal", "external_pa", "active_ext") + + +@dataclass(slots=True) +class RunRNGBundle: + """RNG bundle for a single simulation run.""" + + rng_returns: GeneratorLike + rng_regime: GeneratorLike + rngs_financing: Mapping[str, GeneratorLike] + substream_ids: Mapping[str, str] + + +@dataclass(slots=True) +class SweepRNGBundle: + """RNG bundle for a parameter sweep run.""" + + rng_returns: GeneratorLike + rngs_financing: Mapping[str, GeneratorLike] + substream_ids: Mapping[str, str] + seed: int + + +def ensure_rng(seed: int | None, rng: GeneratorLike | None) -> GeneratorLike: + """Return a generator, creating a deterministic one from ``seed`` if needed.""" + if rng is not None: + return rng + return spawn_rngs(seed, 1)[0] + + +def initialize_run_rngs( + seed: int | None, + *, + financing_agents: Sequence[str] = DEFAULT_FINANCING_AGENTS, + legacy_agent_rng: bool = False, +) -> RunRNGBundle: + """Create per-run RNGs for returns, regimes, and financing.""" + base_rng = spawn_rngs(seed, 1)[0] + child_seeds = base_rng.integers(0, 2**32, size=3, dtype="uint32") + rng_returns = spawn_rngs(int(child_seeds[0]), 1)[0] + rng_regime = spawn_rngs(int(child_seeds[1]), 1)[0] + fin_seed = int(child_seeds[2]) + rngs_financing, substream_ids = spawn_agent_rngs_with_ids( + fin_seed, + financing_agents, + legacy_order=legacy_agent_rng, + ) + return RunRNGBundle( + rng_returns=rng_returns, + rng_regime=rng_regime, + rngs_financing=rngs_financing, + substream_ids=substream_ids, + ) + + +def initialize_sweep_rngs( + seed: int | None, + *, + financing_agents: Sequence[str] = DEFAULT_FINANCING_AGENTS, + legacy_agent_rng: bool = False, +) -> SweepRNGBundle: + """Create per-sweep RNGs derived from the run seed.""" + base_rng = spawn_rngs(seed, 1)[0] + child_seed = int(base_rng.integers(0, 2**32, dtype="uint32")) + rng_returns = spawn_rngs(child_seed, 1)[0] + rngs_financing, substream_ids = spawn_agent_rngs_with_ids( + child_seed, + financing_agents, + legacy_order=legacy_agent_rng, + ) + return SweepRNGBundle( + rng_returns=rng_returns, + rngs_financing=rngs_financing, + substream_ids=substream_ids, + seed=child_seed, + ) diff --git a/pa_core/sweep.py b/pa_core/sweep.py index d28bee23..495ed420 100644 --- a/pa_core/sweep.py +++ b/pa_core/sweep.py @@ -4,7 +4,7 @@ import hashlib import json import logging -from typing import Any, Callable, Dict, Iterator, List, Optional +from typing import Any, Callable, Dict, Iterator, List, Mapping, Optional import numpy as np import pandas as pd @@ -164,7 +164,7 @@ def run_parameter_sweep( cfg: ModelConfig, index_series: pd.Series, rng_returns: GeneratorLike, - fin_rngs: Dict[str, GeneratorLike], + fin_rngs: Mapping[str, GeneratorLike], seed: Optional[int] = None, progress: Optional[Callable[[int, int], None]] = None, ) -> List[SweepResult]: diff --git a/pr_body.md b/pr_body.md index f17c9518..f875f4b7 100644 --- a/pr_body.md +++ b/pr_body.md @@ -9,16 +9,16 @@ Isolate per-run RNG usage across simulations and regime switching, with deterministic tests for seeded runs. #### Tasks -- [x] Modify `pa_core/sim/paths.py` to replace module-level RNG usage with a local `np.random.Generator` instance. Update functions to accept a generator parameter if necessary. -- [x] Refactor `pa_core/sim/regimes.py` to eliminate module-level RNG usage by using a per-run `np.random.Generator` instance. Pass the generator as a parameter or instantiate one within the simulation run. -- [x] Update `pa_core/facade.py` to instantiate a new `np.random.Generator` at the start of each simulation run and ensure it is passed to all downstream functions. -- [x] Review and update existing tests in `tests/test_simulations.py` to explicitly validate RNG isolation by checking that simulations with the same seed produce identical results and those with different seeds produce different results. +- [x] Audit `pa_core/facade.py`, `pa_core/sim/paths.py`, and `pa_core/sim/regimes.py` to identify and replace any direct usage of global RNG functions with `np.random.Generator` instances. +- [x] Modify functions in `pa_core/facade.py`, `pa_core/sim/paths.py`, and `pa_core/sim/regimes.py` to accept an `np.random.Generator` parameter or instantiate a local generator using a deterministic seed when necessary. +- [x] Update the simulation initialization module to create a new, seeded `np.random.Generator` at the start of each simulation run and pass it to all downstream functions. +- [x] Enhance tests in `tests/test_simulations.py` to include cases that perturb the global RNG state and verify that simulation outcomes remain consistent when supplied the same seed. #### Acceptance criteria -- [x] All functions in `pa_core/sim/paths.py` that previously used module-level RNG now accept an `np.random.Generator` instance as a parameter or create one locally. -- [x] All functions in `pa_core/sim/regimes.py` that previously used module-level RNG now accept an `np.random.Generator` instance as a parameter or create one locally. -- [x] A new `np.random.Generator` is instantiated at the start of each simulation run in `pa_core/facade.py` and passed to all downstream functions. -- [x] Tests in `tests/test_simulations.py` validate that simulations with the same seed produce identical results and those with different seeds produce different results. +- [x] All functions in `pa_core/facade.py`, `pa_core/sim/paths.py`, and `pa_core/sim/regimes.py` use an `np.random.Generator` instance for random number generation. +- [x] Functions in `pa_core/facade.py`, `pa_core/sim/paths.py`, and `pa_core/sim/regimes.py` accept an `np.random.Generator` parameter or instantiate one locally with a deterministic seed. +- [x] A new `np.random.Generator` is instantiated at the start of each simulation run in `simulation_initialization.py` using a provided seed. +- [x] Tests in `tests/test_simulations.py` confirm that simulations with the same seed produce identical results, different seeds produce different results, and changes to the global RNG state do not affect results. ## Related Issues - [ ] _Not provided._ ## References diff --git a/tests/test_simulations.py b/tests/test_simulations.py index 6e4e4cbf..4b812b31 100644 --- a/tests/test_simulations.py +++ b/tests/test_simulations.py @@ -311,3 +311,53 @@ def test_run_sweep_is_deterministic_for_seed() -> None: pd.testing.assert_frame_equal(artifacts_a.summary, artifacts_c.summary) assert not artifacts_a.summary.equals(artifacts_b.summary) + + +def test_run_single_ignores_global_rng_state() -> None: + cfg = ModelConfig( + N_SIMULATIONS=4, + N_MONTHS=4, + financing_mode="broadcast", + return_unit="monthly", + mu_H=0.0, + sigma_H=0.01, + mu_E=0.0, + sigma_E=0.01, + mu_M=0.0, + sigma_M=0.01, + rho_idx_H=0.1, + rho_idx_E=0.05, + rho_idx_M=0.05, + rho_H_E=0.1, + rho_H_M=0.1, + rho_E_M=0.1, + ) + idx = pd.Series([0.01, -0.02, 0.015, 0.0]) + artifacts_a = run_single(cfg, idx, RunOptions(seed=123)) + + np.random.seed(999) + _ = np.random.random(1000) + + artifacts_b = run_single(cfg, idx, RunOptions(seed=123)) + for name, values in artifacts_a.returns.items(): + assert np.allclose(values, artifacts_b.returns[name]) + + +def test_run_sweep_ignores_global_rng_state() -> None: + cfg = load_config("examples/scenarios/my_first_scenario.yml").model_copy( + update={"N_SIMULATIONS": 2, "N_MONTHS": 3} + ) + idx = pd.Series([0.01, -0.02, 0.015]) + sweep_params = { + "analysis_mode": "vol_mult", + "sd_multiple_min": 1.0, + "sd_multiple_max": 1.0, + "sd_multiple_step": 1.0, + } + + artifacts_a = run_sweep(cfg, idx, sweep_params, RunOptions(seed=123)) + np.random.seed(444) + _ = np.random.random(500) + artifacts_b = run_sweep(cfg, idx, sweep_params, RunOptions(seed=123)) + + pd.testing.assert_frame_equal(artifacts_a.summary, artifacts_b.summary)