From 1257ec8041e7df00837f982b536348187dc3cea7 Mon Sep 17 00:00:00 2001 From: stranske-automation-bot Date: Mon, 23 Feb 2026 18:21:22 +0000 Subject: [PATCH 01/16] chore(ledger): start task task-01 for issue #227 --- .agents/issue-227-ledger.yml | 367 +++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 .agents/issue-227-ledger.yml diff --git a/.agents/issue-227-ledger.yml b/.agents/issue-227-ledger.yml new file mode 100644 index 00000000..6db6bf18 --- /dev/null +++ b/.agents/issue-227-ledger.yml @@ -0,0 +1,367 @@ +version: 1 +issue: 227 +base: main +branch: codex/issue-227 +tasks: + - id: task-01 + title: Add `mapping_diff_report` console script entrypoint to `pyproject.toml` + under `[project.scripts]` pointing to `.cli.mapping_diff_report:main` + status: doing + started_at: '2026-02-23T18:21:21Z' + finished_at: null + commit: '' + notes: [] + - id: task-02 + title: Create `src//cli/mapping_diff_report.py` with argument parser + that supports `--help` flag + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-03 + title: Implement error handling that exits non-zero and writes single-line stderr + message including `config/name_registry.yml` path when registry is missing or + unreadable + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-04 + title: Wire the CLI main function to call the report generator and write output + to stdout + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-05 + title: Add exit code logic to return zero on success and non-zero on fatal errors + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-06 + title: Create `src//reports/mapping_diff.py` with a callable report generator + function signature that accepts registry path and input sources + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-07 + title: Implement registry loading logic within the report generator using existing + config patterns + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-08 + title: Implement input scanning logic that identifies unmapped names and fallback-mapped + names from normalization and reconciliation sources + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-09 + title: Implement deterministic `UNMAPPED` section generation that lists raw input + names not present in registry + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-10 + title: Implement deterministic `FALLBACK_MAPPED` section generation that lists + input names resolved via fallback with their canonical names + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-11 + title: Implement deterministic `SUGGESTIONS` section generation that provides + canonical name suggestions for every unmapped entry using title-case transformation + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-12 + title: Identify the specific normalization and reconciliation functions that perform + counterparty name resolution + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-13 + title: Modify the name resolution logic to consult the registry for direct canonical + and alias lookups before applying hardcoded mappings + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-14 + title: Extend the return type or object from name resolution functions to include + a `source` field indicating `registry` or `fallback` origin + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-15 + title: Update all call sites of the modified resolution functions to handle the + new `source` field in the return value + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-16 + title: Add unit test in `tests/test_mapping_diff_report_cli.py` that verifies + `mapping_diff_report --help` exits with status zero and prints usage text containing + `mapping_diff_report` string + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-17 + title: Add unit test that verifies missing `config/name_registry.yml` causes non-zero + exit and single-line stderr message containing the registry path + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-18 + title: Add unit test that verifies unreadable `config/name_registry.yml` causes + non-zero exit and appropriate stderr message + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-19 + title: Add unit test that verifies deterministic output against fixed fixtures + contains all three required sections with expected content + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-20 + title: Add integration test in `tests/test_normalization_registry_first.py` using + `name_registry_before.yml` that verifies at least one fixture input resolves + via fallback and appears in `FALLBACK_MAPPED` section + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-21 + title: Add integration test using `name_registry_after.yml` with same inputs that + verifies previously fallback-mapped name now resolves via registry + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-22 + title: Add integration test that captures logs when using `name_registry_after.yml` + and asserts no warning messages contain the previously fallback-mapped raw name + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-23 + title: Add integration test that verifies `mapping_diff_report` output changes + between before and after registry states for the same input set + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-24 + title: Create `tests/fixtures/name_registry_before.yml` with at least one missing + alias that will trigger fallback resolution + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-25 + title: Create `tests/fixtures/name_registry_after.yml` with the previously missing + alias added + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-26 + title: Add normalization/reconciliation input fixtures required for integration + tests + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-27 + title: Update fixture loading in tests to use explicit fixture selection via temp + working dir or dependency injection (not real `config/name_registry.yml`) + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-28 + title: 'Update documentation (README.md or docs page) to include ordered workflow: + (1) edit `config/name_registry.yml`, (2) run `mapping_diff_report`, (3) interpret + `UNMAPPED`, `FALLBACK_MAPPED`, `SUGGESTIONS` sections' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-29 + title: Document which files in the current PR branch diff are registry/report/normalization/tests/docs-related + and which are unrelated, then create a separate branch containing only the in-scope + changes + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-30 + title: '`pyproject.toml` defines a `[project.scripts]` console entrypoint named + `mapping_diff_report`' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-31 + title: Running `mapping_diff_report --help` exits with status code `0` and prints + usage text that includes the string `mapping_diff_report` + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-32 + title: If `config/name_registry.yml` is missing or unreadable, `mapping_diff_report` + exits non-zero and writes a single-line error message to stderr that includes + `config/name_registry.yml` + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-33 + title: '`src//reports/mapping_diff.py` exists and can be imported without + performing IO at import time' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-34 + title: 'With fixed fixture registry + fixed fixture normalization/reconciliation + inputs, `mapping_diff_report` output is deterministic and contains three labeled + sections: `UNMAPPED`, `FALLBACK_MAPPED`, and `SUGGESTIONS`' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-35 + title: The `UNMAPPED` section lists each input name not present in the registry + fixture one per line and prints the raw input name exactly as encountered + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-36 + title: The `FALLBACK_MAPPED` section lists each input name resolved by fallback + logic (not registry alias) and includes both the raw input name and resolved + canonical name on each line + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-37 + title: The `SUGGESTIONS` section includes a non-empty suggested canonical name + for every entry in `UNMAPPED`, and each suggestion line follows the format ` + -> ` where `suggested_canonical_name` is generated + using title-case transformation + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-38 + title: Normalization/reconciliation code consults the name registry before any + hardcoded/fallback mappings and records mapping source as `registry` or `fallback` + per mapped name + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-39 + title: 'Integration scenario A: using `tests/fixtures/name_registry_before.yml`, + at least one fixture input resolves via fallback and `mapping_diff_report` lists + it under `FALLBACK_MAPPED`' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-40 + title: 'Integration scenario B: using `tests/fixtures/name_registry_after.yml` + (same inputs), the previously fallback-mapped name resolves via registry and + does not appear in `FALLBACK_MAPPED` or `UNMAPPED` in `mapping_diff_report` + output' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-41 + title: When using `tests/fixtures/name_registry_after.yml`, the normalization/reconciliation + run emits no warning log messages containing the previously fallback-mapped + raw name + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-42 + title: 'A documentation file (README.md or a docs page) contains an explicit ordered + workflow with the literal steps: (1) edit `config/name_registry.yml`, (2) run + `mapping_diff_report`, (3) interpret `UNMAPPED`, `FALLBACK_MAPPED`, and `SUGGESTIONS` + sections' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] + - id: task-43 + title: 'The PR branch diff contains only files matching these patterns: `src//cli/*`, + `src//reports/*`, `src//name_registry.py`, `tests/test_*registry*.py`, + `tests/test_*mapping_diff*.py`, `tests/fixtures/name_registry*.yml`, `config/name_registry.yml`, + `pyproject.toml` (scripts section only), `README.md` or `docs/*.md`' + status: todo + started_at: null + finished_at: null + commit: '' + notes: [] From 3d7008728f3025b5aa6171cf017f28403623e5db Mon Sep 17 00:00:00 2001 From: stranske-automation-bot Date: Mon, 23 Feb 2026 18:21:38 +0000 Subject: [PATCH 02/16] chore(ledger): finish task task-01 for issue #227 --- .agents/issue-227-ledger.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.agents/issue-227-ledger.yml b/.agents/issue-227-ledger.yml index 6db6bf18..b5141404 100644 --- a/.agents/issue-227-ledger.yml +++ b/.agents/issue-227-ledger.yml @@ -6,10 +6,10 @@ tasks: - id: task-01 title: Add `mapping_diff_report` console script entrypoint to `pyproject.toml` under `[project.scripts]` pointing to `.cli.mapping_diff_report:main` - status: doing + status: done started_at: '2026-02-23T18:21:21Z' - finished_at: null - commit: '' + finished_at: '2026-02-23T18:21:38Z' + commit: 1257ec8041e7df00837f982b536348187dc3cea7 notes: [] - id: task-02 title: Create `src//cli/mapping_diff_report.py` with argument parser From 16f4ff713f8a169eab9781a2713393c842b5ac3b Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Mon, 23 Feb 2026 18:29:56 +0000 Subject: [PATCH 03/16] Add mapping_diff_report console script entrypoint --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 42f7e260..986b9a64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,9 @@ dev = [ Homepage = "https://github.com/stranske/Template" Repository = "https://github.com/stranske/Template" +[project.scripts] +mapping_diff_report = "counter_risk.cli.mapping_diff_report:main" + [tool.setuptools.packages.find] where = ["src"] From a050acec1143a3c29b043fbcec32904bfee7e52f Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Mon, 23 Feb 2026 18:39:53 +0000 Subject: [PATCH 04/16] Create mapping_diff_report CLI module and package CLI layout --- src/counter_risk/{cli.py => cli/__init__.py} | 4 --- src/counter_risk/cli/__main__.py | 8 ++++++ src/counter_risk/cli/mapping_diff_report.py | 26 ++++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) rename src/counter_risk/{cli.py => cli/__init__.py} (97%) create mode 100644 src/counter_risk/cli/__main__.py create mode 100644 src/counter_risk/cli/mapping_diff_report.py diff --git a/src/counter_risk/cli.py b/src/counter_risk/cli/__init__.py similarity index 97% rename from src/counter_risk/cli.py rename to src/counter_risk/cli/__init__.py index f4026338..1b3d3ae0 100644 --- a/src/counter_risk/cli.py +++ b/src/counter_risk/cli/__init__.py @@ -61,7 +61,3 @@ def main(argv: list[str] | None = None) -> int: return 0 command_handler = cast(Callable[[argparse.Namespace], int], handler) return command_handler(args) - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/src/counter_risk/cli/__main__.py b/src/counter_risk/cli/__main__.py new file mode 100644 index 00000000..70e82f1f --- /dev/null +++ b/src/counter_risk/cli/__main__.py @@ -0,0 +1,8 @@ +"""`python -m counter_risk.cli` entrypoint.""" + +from __future__ import annotations + +from counter_risk.cli import main + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/counter_risk/cli/mapping_diff_report.py b/src/counter_risk/cli/mapping_diff_report.py new file mode 100644 index 00000000..5ecb6c40 --- /dev/null +++ b/src/counter_risk/cli/mapping_diff_report.py @@ -0,0 +1,26 @@ +"""CLI entrypoint for mapping diff report generation.""" + +from __future__ import annotations + +import argparse + + +def build_parser() -> argparse.ArgumentParser: + """Build argument parser for mapping_diff_report.""" + + return argparse.ArgumentParser( + prog="mapping_diff_report", + description="Generate a deterministic mapping diff report.", + ) + + +def main(argv: list[str] | None = None) -> int: + """Run the mapping diff report CLI.""" + + parser = build_parser() + parser.parse_args(argv) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 105310b75acf7334aa21c5ed1b641bd8f5dbecf4 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Mon, 23 Feb 2026 18:46:09 +0000 Subject: [PATCH 05/16] Wire mapping_diff_report CLI to deterministic report generator --- src/counter_risk/cli/mapping_diff_report.py | 40 +++++++- src/counter_risk/normalize.py | 95 +++++++++++++---- src/counter_risk/reports/__init__.py | 1 + src/counter_risk/reports/mapping_diff.py | 55 ++++++++++ tests/test_mapping_diff_report_cli.py | 107 ++++++++++++++++++++ 5 files changed, 276 insertions(+), 22 deletions(-) create mode 100644 src/counter_risk/reports/__init__.py create mode 100644 src/counter_risk/reports/mapping_diff.py create mode 100644 tests/test_mapping_diff_report_cli.py diff --git a/src/counter_risk/cli/mapping_diff_report.py b/src/counter_risk/cli/mapping_diff_report.py index 5ecb6c40..51535620 100644 --- a/src/counter_risk/cli/mapping_diff_report.py +++ b/src/counter_risk/cli/mapping_diff_report.py @@ -3,22 +3,58 @@ from __future__ import annotations import argparse +import sys +from pathlib import Path + +from counter_risk.reports.mapping_diff import generate_mapping_diff_report def build_parser() -> argparse.ArgumentParser: """Build argument parser for mapping_diff_report.""" - return argparse.ArgumentParser( + parser = argparse.ArgumentParser( prog="mapping_diff_report", description="Generate a deterministic mapping diff report.", ) + parser.add_argument( + "--registry", + type=Path, + default=Path("config/name_registry.yml"), + help="Path to registry YAML file.", + ) + parser.add_argument( + "--normalization-name", + action="append", + default=[], + help="Raw input name observed during normalization. Can be provided multiple times.", + ) + parser.add_argument( + "--reconciliation-name", + action="append", + default=[], + help="Raw input name observed during reconciliation. Can be provided multiple times.", + ) + return parser def main(argv: list[str] | None = None) -> int: """Run the mapping diff report CLI.""" parser = build_parser() - parser.parse_args(argv) + args = parser.parse_args(argv) + + input_sources = { + "normalization": list(args.normalization_name), + "reconciliation": list(args.reconciliation_name), + } + try: + report = generate_mapping_diff_report(args.registry, input_sources) + except ValueError as exc: + error_line = " ".join(str(exc).splitlines()) + print(error_line, file=sys.stderr) + return 1 + + sys.stdout.write(report) return 0 diff --git a/src/counter_risk/normalize.py b/src/counter_risk/normalize.py index c2f12d95..a08996a7 100644 --- a/src/counter_risk/normalize.py +++ b/src/counter_risk/normalize.py @@ -6,6 +6,42 @@ from __future__ import annotations +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path +from typing import Literal + +from counter_risk.name_registry import NameRegistryConfig, load_name_registry + + +@dataclass(frozen=True) +class NameResolution: + """A resolved counterparty name and mapping origin.""" + + raw_name: str + canonical_name: str + source: Literal["registry", "fallback", "unmapped"] + + +_COUNTERPARTY_FALLBACK_MAPPINGS = { + "Citigroup": "Citibank", + "Bank of America, NA": "Bank of America", + "Bank of America NA": "Bank of America", + "Goldman Sachs Int'l": "Goldman Sachs", + "Societe Generale": "Soc Gen", + "Barclays Bank PLC": "Barclays", +} + +_CLEARING_HOUSE_FALLBACK_MAPPINGS = { + "CME Clearing House": "CME", + "ICE Clear U.S.": "ICE", + "ICE Clear US": "ICE", + "ICE Clear Europe": "ICE Euro", + "EUREX Clearing": "EUREX", + "Japan Securities Clearing Corporation": "Japan SCC", + "Korea Exchange (in-house)": "Korea Exchange", +} + def _normalize_whitespace(name: str) -> str: """Trim leading/trailing whitespace and collapse internal runs of whitespace.""" @@ -13,32 +49,51 @@ def _normalize_whitespace(name: str) -> str: return " ".join(name.split()) +@lru_cache(maxsize=8) +def _load_alias_lookup(registry_path: str) -> dict[str, str]: + try: + registry = load_name_registry(Path(registry_path)) + except ValueError: + return {} + return _build_alias_lookup(registry) + + +def _build_alias_lookup(registry: NameRegistryConfig) -> dict[str, str]: + lookup: dict[str, str] = {} + for entry in registry.entries: + for alias in entry.aliases: + lookup[_normalize_whitespace(alias).casefold()] = entry.display_name + return lookup + + +def resolve_counterparty( + name: str, + *, + registry_path: str | Path = Path("config/name_registry.yml"), +) -> NameResolution: + """Resolve counterparty name with registry-first semantics.""" + + normalized = _normalize_whitespace(name) + alias_lookup = _load_alias_lookup(str(Path(registry_path))) + registry_match = alias_lookup.get(normalized.casefold()) + if registry_match is not None: + return NameResolution(raw_name=name, canonical_name=registry_match, source="registry") + + fallback_match = _COUNTERPARTY_FALLBACK_MAPPINGS.get(normalized) + if fallback_match is not None: + return NameResolution(raw_name=name, canonical_name=fallback_match, source="fallback") + + return NameResolution(raw_name=name, canonical_name=normalized, source="unmapped") + + def normalize_counterparty(name: str) -> str: """Normalize a counterparty name to the canonical historical workbook label.""" - mappings = { - "Citigroup": "Citibank", - "Bank of America, NA": "Bank of America", - "Bank of America NA": "Bank of America", - "Goldman Sachs Int'l": "Goldman Sachs", - "Societe Generale": "Soc Gen", - "Barclays Bank PLC": "Barclays", - } - normalized = _normalize_whitespace(name) - return mappings.get(normalized, normalized) + return resolve_counterparty(name).canonical_name def normalize_clearing_house(name: str) -> str: """Normalize a clearing house name to the canonical historical workbook label.""" - mappings = { - "CME Clearing House": "CME", - "ICE Clear U.S.": "ICE", - "ICE Clear US": "ICE", - "ICE Clear Europe": "ICE Euro", - "EUREX Clearing": "EUREX", - "Japan Securities Clearing Corporation": "Japan SCC", - "Korea Exchange (in-house)": "Korea Exchange", - } normalized = _normalize_whitespace(name) - return mappings.get(normalized, normalized) + return _CLEARING_HOUSE_FALLBACK_MAPPINGS.get(normalized, normalized) diff --git a/src/counter_risk/reports/__init__.py b/src/counter_risk/reports/__init__.py new file mode 100644 index 00000000..b9e195b7 --- /dev/null +++ b/src/counter_risk/reports/__init__.py @@ -0,0 +1 @@ +"""Report generation package.""" diff --git a/src/counter_risk/reports/mapping_diff.py b/src/counter_risk/reports/mapping_diff.py new file mode 100644 index 00000000..bb608818 --- /dev/null +++ b/src/counter_risk/reports/mapping_diff.py @@ -0,0 +1,55 @@ +"""Deterministic mapping diff report generator.""" + +from __future__ import annotations + +from collections.abc import Iterable, Mapping +from pathlib import Path + +from counter_risk.name_registry import load_name_registry +from counter_risk.normalize import resolve_counterparty + + +def _title_case_suggestion(raw_name: str) -> str: + return raw_name.title() + + +def _iter_input_names(input_sources: Mapping[str, Iterable[str]]) -> Iterable[str]: + for source_name in sorted(input_sources): + for raw_name in input_sources[source_name]: + yield raw_name + + +def generate_mapping_diff_report( + registry_path: str | Path, + input_sources: Mapping[str, Iterable[str]], +) -> str: + """Generate a deterministic mapping diff report.""" + + # Load once so missing/unreadable/invalid registry is treated as fatal for report generation. + load_name_registry(registry_path) + + unmapped_names: dict[str, None] = {} + fallback_mapped: dict[str, str] = {} + + for raw_name in _iter_input_names(input_sources): + result = resolve_counterparty(raw_name, registry_path=registry_path) + if result.source == "fallback": + fallback_mapped.setdefault(raw_name, result.canonical_name) + continue + if result.source == "unmapped": + unmapped_names.setdefault(raw_name, None) + + lines: list[str] = ["UNMAPPED"] + lines.extend(sorted(unmapped_names, key=str.casefold)) + lines.append("") + + lines.append("FALLBACK_MAPPED") + for raw_name in sorted(fallback_mapped, key=str.casefold): + lines.append(f"{raw_name} -> {fallback_mapped[raw_name]}") + lines.append("") + + lines.append("SUGGESTIONS") + for raw_name in sorted(unmapped_names, key=str.casefold): + lines.append(f"{raw_name} -> {_title_case_suggestion(raw_name)}") + + return "\n".join(lines) + "\n" diff --git a/tests/test_mapping_diff_report_cli.py b/tests/test_mapping_diff_report_cli.py new file mode 100644 index 00000000..c6cd7f45 --- /dev/null +++ b/tests/test_mapping_diff_report_cli.py @@ -0,0 +1,107 @@ +"""Tests for mapping_diff_report CLI behavior.""" + +from __future__ import annotations + +import os +import stat +import subprocess +import sys +from pathlib import Path + + +def _cli_cmd() -> list[str]: + return [sys.executable, "-m", "counter_risk.cli.mapping_diff_report"] + + +def _cli_env() -> dict[str, str]: + env = os.environ.copy() + src_path = str(Path("src").resolve()) + env["PYTHONPATH"] = ( + src_path if "PYTHONPATH" not in env else f"{src_path}{os.pathsep}{env['PYTHONPATH']}" + ) + return env + + +def test_mapping_diff_report_help_exits_zero() -> None: + result = subprocess.run( + [*_cli_cmd(), "--help"], + check=False, + capture_output=True, + text=True, + env=_cli_env(), + ) + assert result.returncode == 0 + assert "mapping_diff_report" in result.stdout + + +def test_mapping_diff_report_missing_registry_exits_nonzero(tmp_path: Path) -> None: + missing_registry = tmp_path / "missing_registry.yml" + result = subprocess.run( + [*_cli_cmd(), "--registry", str(missing_registry)], + check=False, + capture_output=True, + text=True, + env=_cli_env(), + ) + assert result.returncode != 0 + assert str(missing_registry) in result.stderr + assert len(result.stderr.strip().splitlines()) == 1 + + +def test_mapping_diff_report_unreadable_registry_exits_nonzero(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text("schema_version: 1\nentries: []\n", encoding="utf-8") + registry_path.chmod(0) + try: + result = subprocess.run( + [*_cli_cmd(), "--registry", str(registry_path)], + check=False, + capture_output=True, + text=True, + env=_cli_env(), + ) + finally: + registry_path.chmod(stat.S_IRUSR | stat.S_IWUSR) + + assert result.returncode != 0 + assert str(registry_path) in result.stderr + assert len(result.stderr.strip().splitlines()) == 1 + + +def test_mapping_diff_report_deterministic_sections(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: bank_of_america", + " display_name: Bank of America", + " aliases:", + " - Bank of America", + ] + ) + + "\n", + encoding="utf-8", + ) + + args = [ + *_cli_cmd(), + "--registry", + str(registry_path), + "--normalization-name", + "Societe Generale", + "--normalization-name", + "Unknown House", + "--reconciliation-name", + "Unknown House", + ] + first = subprocess.run(args, check=False, capture_output=True, text=True, env=_cli_env()) + second = subprocess.run(args, check=False, capture_output=True, text=True, env=_cli_env()) + + assert first.returncode == 0 + assert second.returncode == 0 + assert first.stdout == second.stdout + assert "UNMAPPED\nUnknown House\n" in first.stdout + assert "FALLBACK_MAPPED\nSociete Generale -> Soc Gen\n" in first.stdout + assert "SUGGESTIONS\nUnknown House -> Unknown House\n" in first.stdout From a5037e8e77f7544fdd41b5518e89622800c77a21 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Mon, 23 Feb 2026 18:59:12 +0000 Subject: [PATCH 06/16] Implement mapping diff scanning for normalization and reconciliation inputs --- src/counter_risk/reports/mapping_diff.py | 96 ++++++++++++++++++++++-- tests/test_mapping_diff_report.py | 84 +++++++++++++++++++++ 2 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 tests/test_mapping_diff_report.py diff --git a/src/counter_risk/reports/mapping_diff.py b/src/counter_risk/reports/mapping_diff.py index bb608818..28582a2c 100644 --- a/src/counter_risk/reports/mapping_diff.py +++ b/src/counter_risk/reports/mapping_diff.py @@ -2,26 +2,112 @@ from __future__ import annotations -from collections.abc import Iterable, Mapping +from collections.abc import Iterable, Iterator, Mapping from pathlib import Path +from typing import Any from counter_risk.name_registry import load_name_registry from counter_risk.normalize import resolve_counterparty +_NORMALIZATION_NAME_KEYS = { + "counterparty", + "counterparty_name", + "name", + "raw_counterparty", + "raw_name", +} +_RECONCILIATION_NAME_KEYS = { + "counterparties_in_data", + "raw_counterparty_labels", +} + def _title_case_suggestion(raw_name: str) -> str: return raw_name.title() -def _iter_input_names(input_sources: Mapping[str, Iterable[str]]) -> Iterable[str]: +def _iter_string_values(value: Any) -> Iterator[str]: + if isinstance(value, str): + normalized = value.strip() + if normalized: + yield normalized + return + if isinstance(value, Mapping): + return + if isinstance(value, Iterable): + for item in value: + yield from _iter_string_values(item) + + +def _iter_names_from_payload( + value: Any, + *, + name_keys: set[str], + collect_strings: bool = False, +) -> Iterator[str]: + if isinstance(value, str): + if collect_strings: + normalized = value.strip() + if normalized: + yield normalized + return + + if isinstance(value, Mapping): + for raw_key, raw_child in value.items(): + key = str(raw_key).strip().casefold() + child_collect = collect_strings or key in name_keys + yield from _iter_names_from_payload( + raw_child, + name_keys=name_keys, + collect_strings=child_collect, + ) + return + + if isinstance(value, Iterable): + for child in value: + yield from _iter_names_from_payload( + child, + name_keys=name_keys, + collect_strings=collect_strings, + ) + + +def _iter_flat_string_sequence(payload: Any) -> Iterator[str]: + if isinstance(payload, str) or isinstance(payload, Mapping): + return + if not isinstance(payload, Iterable): + return + + values = list(payload) + if not values or not all(isinstance(value, str) for value in values): + return + + for value in values: + normalized = value.strip() + if normalized: + yield normalized + + +def _iter_input_names(input_sources: Mapping[str, Any]) -> Iterable[str]: for source_name in sorted(input_sources): - for raw_name in input_sources[source_name]: - yield raw_name + payload = input_sources[source_name] + source_key = str(source_name).strip().casefold() + if source_key == "normalization": + yield from _iter_flat_string_sequence(payload) + yield from _iter_names_from_payload(payload, name_keys=_NORMALIZATION_NAME_KEYS) + continue + if source_key == "reconciliation": + yield from _iter_flat_string_sequence(payload) + yield from _iter_names_from_payload(payload, name_keys=_RECONCILIATION_NAME_KEYS) + continue + + # Backward-compatible fallback for legacy callers that pass a flat list of names. + yield from _iter_string_values(payload) def generate_mapping_diff_report( registry_path: str | Path, - input_sources: Mapping[str, Iterable[str]], + input_sources: Mapping[str, Any], ) -> str: """Generate a deterministic mapping diff report.""" diff --git a/tests/test_mapping_diff_report.py b/tests/test_mapping_diff_report.py new file mode 100644 index 00000000..62d5b778 --- /dev/null +++ b/tests/test_mapping_diff_report.py @@ -0,0 +1,84 @@ +"""Unit tests for mapping diff report input scanning.""" + +from __future__ import annotations + +from pathlib import Path + +from counter_risk.reports.mapping_diff import generate_mapping_diff_report + + +def _write_registry(path: Path) -> None: + path.write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: bank_of_america", + " display_name: Bank of America", + " aliases:", + " - Bank of America", + ] + ) + + "\n", + encoding="utf-8", + ) + + +def test_generate_mapping_diff_report_scans_normalization_and_reconciliation_payloads( + tmp_path: Path, +) -> None: + registry_path = tmp_path / "name_registry.yml" + _write_registry(registry_path) + + report = generate_mapping_diff_report( + registry_path, + { + "normalization": [ + {"counterparty": "Societe Generale", "notional": 1.0}, + {"counterparty": "Unknown House"}, + ], + "reconciliation": { + "by_sheet": { + "Total": { + "counterparties_in_data": [ + "Bank of America, NA", + "Unknown House", + "Citigroup", + ], + "normalized_counterparties_in_data": [ + "Bank of America", + "Unknown House", + "Citibank", + ], + } + } + }, + }, + ) + + assert "UNMAPPED\nUnknown House\n" in report + assert "FALLBACK_MAPPED\n" in report + assert "Bank of America, NA -> Bank of America\n" in report + assert "Citigroup -> Citibank\n" in report + assert "Societe Generale -> Soc Gen\n" in report + assert "SUGGESTIONS\nUnknown House -> Unknown House\n" in report + + +def test_generate_mapping_diff_report_ignores_non_name_string_fields(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + _write_registry(registry_path) + + report = generate_mapping_diff_report( + registry_path, + { + "normalization": { + "metadata": {"run_id": "run-123"}, + "rows": [{"counterparty": "Societe Generale", "segment": "swaps"}], + }, + "reconciliation": {"warnings": ["raw='Societe Generale'"]}, + }, + ) + + assert "run-123" not in report + assert "raw='Societe Generale'" not in report + assert "Societe Generale -> Soc Gen\n" in report From b56b3fa22b5801cb078b15697dc021919e092168 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Tue, 24 Feb 2026 04:17:03 +0000 Subject: [PATCH 07/16] Preserve raw unmapped names in mapping diff report --- src/counter_risk/reports/mapping_diff.py | 19 ++++++++++--------- tests/test_mapping_diff_report.py | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/counter_risk/reports/mapping_diff.py b/src/counter_risk/reports/mapping_diff.py index 28582a2c..33f83616 100644 --- a/src/counter_risk/reports/mapping_diff.py +++ b/src/counter_risk/reports/mapping_diff.py @@ -26,11 +26,14 @@ def _title_case_suggestion(raw_name: str) -> str: return raw_name.title() +def _is_nonblank(value: str) -> bool: + return bool(value.strip()) + + def _iter_string_values(value: Any) -> Iterator[str]: if isinstance(value, str): - normalized = value.strip() - if normalized: - yield normalized + if _is_nonblank(value): + yield value return if isinstance(value, Mapping): return @@ -47,9 +50,8 @@ def _iter_names_from_payload( ) -> Iterator[str]: if isinstance(value, str): if collect_strings: - normalized = value.strip() - if normalized: - yield normalized + if _is_nonblank(value): + yield value return if isinstance(value, Mapping): @@ -83,9 +85,8 @@ def _iter_flat_string_sequence(payload: Any) -> Iterator[str]: return for value in values: - normalized = value.strip() - if normalized: - yield normalized + if _is_nonblank(value): + yield value def _iter_input_names(input_sources: Mapping[str, Any]) -> Iterable[str]: diff --git a/tests/test_mapping_diff_report.py b/tests/test_mapping_diff_report.py index 62d5b778..c56f7776 100644 --- a/tests/test_mapping_diff_report.py +++ b/tests/test_mapping_diff_report.py @@ -82,3 +82,21 @@ def test_generate_mapping_diff_report_ignores_non_name_string_fields(tmp_path: P assert "run-123" not in report assert "raw='Societe Generale'" not in report assert "Societe Generale -> Soc Gen\n" in report + + +def test_generate_mapping_diff_report_preserves_raw_names(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + _write_registry(registry_path) + + report = generate_mapping_diff_report( + registry_path, + { + "normalization": [ + {"counterparty": " Unknown House "}, + {"counterparty": " "}, + ], + }, + ) + + assert "UNMAPPED\n Unknown House \n" in report + assert "Unknown House\n" not in report From 37898c201a4280bc6a2e7fc17de2d3151b1a1540 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 05:29:11 +0000 Subject: [PATCH 08/16] chore(codex-keepalive): apply updates (PR #228) --- src/counter_risk/reports/mapping_diff.py | 12 ++++++--- tests/test_mapping_diff_report.py | 34 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/counter_risk/reports/mapping_diff.py b/src/counter_risk/reports/mapping_diff.py index 33f83616..f911cba7 100644 --- a/src/counter_risk/reports/mapping_diff.py +++ b/src/counter_risk/reports/mapping_diff.py @@ -26,6 +26,12 @@ def _title_case_suggestion(raw_name: str) -> str: return raw_name.title() +def _sorted_raw_names(values: Iterable[str]) -> list[str]: + """Sort names deterministically with case-insensitive primary ordering.""" + + return sorted(values, key=lambda raw_name: (raw_name.casefold(), raw_name)) + + def _is_nonblank(value: str) -> bool: return bool(value.strip()) @@ -127,16 +133,16 @@ def generate_mapping_diff_report( unmapped_names.setdefault(raw_name, None) lines: list[str] = ["UNMAPPED"] - lines.extend(sorted(unmapped_names, key=str.casefold)) + lines.extend(_sorted_raw_names(unmapped_names)) lines.append("") lines.append("FALLBACK_MAPPED") - for raw_name in sorted(fallback_mapped, key=str.casefold): + for raw_name in _sorted_raw_names(fallback_mapped): lines.append(f"{raw_name} -> {fallback_mapped[raw_name]}") lines.append("") lines.append("SUGGESTIONS") - for raw_name in sorted(unmapped_names, key=str.casefold): + for raw_name in _sorted_raw_names(unmapped_names): lines.append(f"{raw_name} -> {_title_case_suggestion(raw_name)}") return "\n".join(lines) + "\n" diff --git a/tests/test_mapping_diff_report.py b/tests/test_mapping_diff_report.py index c56f7776..4fc524bc 100644 --- a/tests/test_mapping_diff_report.py +++ b/tests/test_mapping_diff_report.py @@ -100,3 +100,37 @@ def test_generate_mapping_diff_report_preserves_raw_names(tmp_path: Path) -> Non assert "UNMAPPED\n Unknown House \n" in report assert "Unknown House\n" not in report + + +def test_generate_mapping_diff_report_fallback_section_is_deterministic(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + _write_registry(registry_path) + + report = generate_mapping_diff_report( + registry_path, + { + "normalization": [ + {"counterparty": "Citigroup"}, + {"counterparty": "Bank of America, NA"}, + {"counterparty": "Societe Generale"}, + ], + "reconciliation": { + "counterparties_in_data": [ + "Societe Generale", + "Citigroup", + "Bank of America, NA", + ] + }, + }, + ) + + expected_section = "\n".join( + [ + "FALLBACK_MAPPED", + "Bank of America, NA -> Bank of America", + "Citigroup -> Citibank", + "Societe Generale -> Soc Gen", + "", + ] + ) + assert expected_section in report From 91aedc15eaa07d11ed19dc000a87a682d6faa835 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 05:40:48 +0000 Subject: [PATCH 09/16] chore(codex-keepalive): apply updates (PR #228) --- tests/fixtures/name_registry_after.yml | 7 ++ tests/fixtures/name_registry_before.yml | 6 ++ tests/test_normalization_registry_first.py | 82 ++++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 tests/fixtures/name_registry_after.yml create mode 100644 tests/fixtures/name_registry_before.yml create mode 100644 tests/test_normalization_registry_first.py diff --git a/tests/fixtures/name_registry_after.yml b/tests/fixtures/name_registry_after.yml new file mode 100644 index 00000000..c13c49c9 --- /dev/null +++ b/tests/fixtures/name_registry_after.yml @@ -0,0 +1,7 @@ +schema_version: 1 +entries: + - canonical_key: soc_gen_inc + display_name: Soc Gen Inc + aliases: + - Soc Gen Inc + - Societe Generale diff --git a/tests/fixtures/name_registry_before.yml b/tests/fixtures/name_registry_before.yml new file mode 100644 index 00000000..fee6e526 --- /dev/null +++ b/tests/fixtures/name_registry_before.yml @@ -0,0 +1,6 @@ +schema_version: 1 +entries: + - canonical_key: soc_gen_inc + display_name: Soc Gen Inc + aliases: + - Soc Gen Inc diff --git a/tests/test_normalization_registry_first.py b/tests/test_normalization_registry_first.py new file mode 100644 index 00000000..2a3a9c8c --- /dev/null +++ b/tests/test_normalization_registry_first.py @@ -0,0 +1,82 @@ +"""Integration tests for registry-first normalization and mapping diff output.""" + +from __future__ import annotations + +import logging +import shutil +from pathlib import Path + +import pytest + +from counter_risk.pipeline.run import reconcile_series_coverage +from counter_risk.reports.mapping_diff import generate_mapping_diff_report + + +def _fixture_path(name: str) -> Path: + return Path("tests/fixtures") / name + + +def _input_sources() -> dict[str, object]: + return { + "normalization": [{"counterparty": "Societe Generale"}], + "reconciliation": {"counterparties_in_data": ["Societe Generale"]}, + } + + +def test_mapping_diff_report_before_registry_alias_uses_fallback_section() -> None: + report = generate_mapping_diff_report( + _fixture_path("name_registry_before.yml"), _input_sources() + ) + + assert "FALLBACK_MAPPED\nSociete Generale -> Soc Gen\n" in report + + +def test_mapping_diff_report_after_registry_alias_removes_fallback_and_unmapped_entries() -> None: + report = generate_mapping_diff_report( + _fixture_path("name_registry_after.yml"), _input_sources() + ) + + assert "Societe Generale -> Soc Gen\n" not in report + assert "UNMAPPED\nSociete Generale\n" not in report + assert "SUGGESTIONS\nSociete Generale -> Societe Generale\n" not in report + + +def test_mapping_diff_report_changes_between_before_and_after_registry_states() -> None: + before_report = generate_mapping_diff_report( + _fixture_path("name_registry_before.yml"), + _input_sources(), + ) + after_report = generate_mapping_diff_report( + _fixture_path("name_registry_after.yml"), + _input_sources(), + ) + + assert before_report != after_report + assert "Societe Generale -> Soc Gen\n" in before_report + assert "Societe Generale -> Soc Gen\n" not in after_report + + +def test_reconciliation_with_after_registry_has_no_societe_generale_warning( + caplog: pytest.LogCaptureFixture, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True) + shutil.copyfile( + _fixture_path("name_registry_after.yml"), + config_dir / "name_registry.yml", + ) + monkeypatch.chdir(tmp_path) + caplog.set_level(logging.WARNING) + + result = reconcile_series_coverage( + parsed_data_by_sheet={ + "Total": {"totals": [{"counterparty": "Societe Generale"}], "futures": []} + }, + historical_series_headers_by_sheet={"Total": ("Soc Gen Inc", "Legacy Counterparty")}, + ) + + assert result["warnings"] + assert not any("Societe Generale" in warning for warning in result["warnings"]) + assert all("Societe Generale" not in record.getMessage() for record in caplog.records) From 19d659b1a05829eec03aec2186aecb61571e57f7 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Tue, 24 Feb 2026 06:08:32 +0000 Subject: [PATCH 10/16] fix: resolve CI failures --- release.spec | 2 +- src/counter_risk/reports/mapping_diff.py | 7 +++---- tests/pipeline/test_run_pipeline.py | 10 +++++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/release.spec b/release.spec index d9e12d3b..c654a3cf 100644 --- a/release.spec +++ b/release.spec @@ -23,7 +23,7 @@ if fixture_template.exists(): a = Analysis( - [str(project_root / "src" / "counter_risk" / "cli.py")], + [str(project_root / "src" / "counter_risk" / "cli" / "__main__.py")], pathex=[str(project_root / "src")], binaries=[], datas=datas, diff --git a/src/counter_risk/reports/mapping_diff.py b/src/counter_risk/reports/mapping_diff.py index f911cba7..b75a5e13 100644 --- a/src/counter_risk/reports/mapping_diff.py +++ b/src/counter_risk/reports/mapping_diff.py @@ -55,9 +55,8 @@ def _iter_names_from_payload( collect_strings: bool = False, ) -> Iterator[str]: if isinstance(value, str): - if collect_strings: - if _is_nonblank(value): - yield value + if collect_strings and _is_nonblank(value): + yield value return if isinstance(value, Mapping): @@ -81,7 +80,7 @@ def _iter_names_from_payload( def _iter_flat_string_sequence(payload: Any) -> Iterator[str]: - if isinstance(payload, str) or isinstance(payload, Mapping): + if isinstance(payload, (str, Mapping)): return if not isinstance(payload, Iterable): return diff --git a/tests/pipeline/test_run_pipeline.py b/tests/pipeline/test_run_pipeline.py index cb8832a7..f86a228c 100644 --- a/tests/pipeline/test_run_pipeline.py +++ b/tests/pipeline/test_run_pipeline.py @@ -1253,7 +1253,7 @@ def test_create_static_distribution_warns_on_non_windows( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """On non-Windows platforms a clear warning is emitted and no paths are returned.""" - monkeypatch.setattr(run_module.platform, "system", lambda: "Linux") + monkeypatch.setattr("counter_risk.pipeline.run.platform.system", lambda: "Linux") source_pptx = tmp_path / "deck.pptx" source_pptx.write_bytes(b"fake-pptx") @@ -1279,7 +1279,7 @@ def test_create_static_distribution_warns_when_win32com_missing( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """When win32com is absent on a simulated Windows host a warning is emitted.""" - monkeypatch.setattr(run_module.platform, "system", lambda: "Windows") + monkeypatch.setattr("counter_risk.pipeline.run.platform.system", lambda: "Windows") # Ensure win32com.client cannot be imported. monkeypatch.setitem(sys.modules, "win32com", None) monkeypatch.setitem(sys.modules, "win32com.client", None) @@ -1332,7 +1332,7 @@ def test_run_pipeline_manifest_includes_distribution_static_warning( ) # Force non-Windows so the fallback path is exercised. - monkeypatch.setattr(run_module.platform, "system", lambda: "Linux") + monkeypatch.setattr("counter_risk.pipeline.run.platform.system", lambda: "Linux") run_dir = run_pipeline(config_path) @@ -1395,10 +1395,10 @@ def __init__(self) -> None: self._slides = [_FakeSlide()] self.Count = 1 - def __iter__(self): # type: ignore[no-untyped-def] + def __iter__(self) -> Any: return iter(self._slides) - def __getitem__(self, idx: int): # type: ignore[no-untyped-def] + def __getitem__(self, idx: int) -> Any: return self._slides[idx - 1] class _Presentation: From 875557117eda1ac94a96781557578eb04f680ce1 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Tue, 24 Feb 2026 07:45:40 +0000 Subject: [PATCH 11/16] fix: resolve CI failures --- src/counter_risk/normalize.py | 2 +- tests/test_release_spec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/counter_risk/normalize.py b/src/counter_risk/normalize.py index a08996a7..4cdd3cfc 100644 --- a/src/counter_risk/normalize.py +++ b/src/counter_risk/normalize.py @@ -74,7 +74,7 @@ def resolve_counterparty( """Resolve counterparty name with registry-first semantics.""" normalized = _normalize_whitespace(name) - alias_lookup = _load_alias_lookup(str(Path(registry_path))) + alias_lookup = _load_alias_lookup(str(Path(registry_path).resolve())) registry_match = alias_lookup.get(normalized.casefold()) if registry_match is not None: return NameResolution(raw_name=name, canonical_name=registry_match, source="registry") diff --git a/tests/test_release_spec.py b/tests/test_release_spec.py index 46bce21c..b8f3e46c 100644 --- a/tests/test_release_spec.py +++ b/tests/test_release_spec.py @@ -54,7 +54,7 @@ def _collect(*args: object, **kwargs: object) -> str: analysis_scripts = captures["analysis_args"][0] assert len(analysis_scripts) == 1 - assert analysis_scripts[0].endswith("src/counter_risk/cli.py") + assert analysis_scripts[0].endswith("src/counter_risk/cli/__main__.py") runtime_hooks = captures["analysis_kwargs"]["runtime_hooks"] assert len(runtime_hooks) == 1 From 7c055c386f45a5fada891a49bf59c176ffb01407 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:15:50 +0000 Subject: [PATCH 12/16] chore(codex-keepalive): apply updates (PR #228) --- README.md | 6 ++++++ tests/test_mapping_diff_report.py | 34 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/README.md b/README.md index 163a2bce..e60a7b63 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,12 @@ A programmatic replacement for the MOSERS spreadsheet workflow used to evaluate - **Keepalive troubleshooting:** [docs/KEEPALIVE_TROUBLESHOOTING.md](docs/KEEPALIVE_TROUBLESHOOTING.md) - **Consumer setup checklist:** [docs/SETUP_CHECKLIST.md](docs/SETUP_CHECKLIST.md) +## Name Registry Workflow + +1. Edit `config/name_registry.yml`. +2. Run `mapping_diff_report`. +3. Interpret `UNMAPPED`, `FALLBACK_MAPPED`, and `SUGGESTIONS` sections. + ## Repository automation (high level) This repo is integrated with the central [stranske/Workflows](https://github.com/stranske/Workflows) library. diff --git a/tests/test_mapping_diff_report.py b/tests/test_mapping_diff_report.py index 4fc524bc..3e8f4025 100644 --- a/tests/test_mapping_diff_report.py +++ b/tests/test_mapping_diff_report.py @@ -134,3 +134,37 @@ def test_generate_mapping_diff_report_fallback_section_is_deterministic(tmp_path ] ) assert expected_section in report + + +def test_generate_mapping_diff_report_suggestions_are_deterministic_title_case( + tmp_path: Path, +) -> None: + registry_path = tmp_path / "name_registry.yml" + _write_registry(registry_path) + + report = generate_mapping_diff_report( + registry_path, + { + "normalization": [ + {"counterparty": "aaa holdings"}, + {"counterparty": "zeta llc"}, + {"counterparty": "aaa holdings"}, + ], + "reconciliation": { + "counterparties_in_data": [ + "zeta llc", + "aaa holdings", + ] + }, + }, + ) + + expected_section = "\n".join( + [ + "SUGGESTIONS", + "aaa holdings -> Aaa Holdings", + "zeta llc -> Zeta Llc", + "", + ] + ) + assert expected_section in report From 39c6ab25d61ddc5d2d4cb0145c8a5647308d535e Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Tue, 24 Feb 2026 14:27:26 +0000 Subject: [PATCH 13/16] test: tighten mapping diff CLI and section format assertions --- tests/test_mapping_diff_report.py | 29 +++++++++++++++++++++++++++ tests/test_mapping_diff_report_cli.py | 14 +++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tests/test_mapping_diff_report.py b/tests/test_mapping_diff_report.py index 3e8f4025..2a906765 100644 --- a/tests/test_mapping_diff_report.py +++ b/tests/test_mapping_diff_report.py @@ -168,3 +168,32 @@ def test_generate_mapping_diff_report_suggestions_are_deterministic_title_case( ] ) assert expected_section in report + + +def test_generate_mapping_diff_report_sections_use_required_line_formats(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + _write_registry(registry_path) + + report = generate_mapping_diff_report( + registry_path, + { + "normalization": [ + {"counterparty": "UNKNOWN broker"}, + {"counterparty": "Citigroup"}, + ], + "reconciliation": {"counterparties_in_data": ["UNKNOWN broker"]}, + }, + ) + + lines = report.splitlines() + unmapped_start = lines.index("UNMAPPED") + fallback_start = lines.index("FALLBACK_MAPPED") + suggestions_start = lines.index("SUGGESTIONS") + + unmapped_lines = lines[unmapped_start + 1 : fallback_start - 1] + fallback_lines = lines[fallback_start + 1 : suggestions_start - 1] + suggestion_lines = lines[suggestions_start + 1 :] + + assert unmapped_lines == ["UNKNOWN broker"] + assert fallback_lines == ["Citigroup -> Citibank"] + assert suggestion_lines == ["UNKNOWN broker -> Unknown Broker"] diff --git a/tests/test_mapping_diff_report_cli.py b/tests/test_mapping_diff_report_cli.py index c6cd7f45..71d8c239 100644 --- a/tests/test_mapping_diff_report_cli.py +++ b/tests/test_mapping_diff_report_cli.py @@ -48,6 +48,20 @@ def test_mapping_diff_report_missing_registry_exits_nonzero(tmp_path: Path) -> N assert len(result.stderr.strip().splitlines()) == 1 +def test_mapping_diff_report_default_registry_missing_mentions_config_path(tmp_path: Path) -> None: + result = subprocess.run( + _cli_cmd(), + check=False, + capture_output=True, + text=True, + env=_cli_env(), + cwd=tmp_path, + ) + assert result.returncode != 0 + assert "config/name_registry.yml" in result.stderr + assert len(result.stderr.strip().splitlines()) == 1 + + def test_mapping_diff_report_unreadable_registry_exits_nonzero(tmp_path: Path) -> None: registry_path = tmp_path / "name_registry.yml" registry_path.write_text("schema_version: 1\nentries: []\n", encoding="utf-8") From 10c1e15712fc540a1b1dfaa381a835bcc8b87ca9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:42:36 +0000 Subject: [PATCH 14/16] chore(codex-keepalive): apply updates (PR #228) --- src/counter_risk/pipeline/run.py | 34 ++++++++-- .../test_reconcile_series_coverage.py | 65 +++++++++++++++++++ 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/counter_risk/pipeline/run.py b/src/counter_risk/pipeline/run.py index 0807781f..0fda448a 100644 --- a/src/counter_risk/pipeline/run.py +++ b/src/counter_risk/pipeline/run.py @@ -18,7 +18,7 @@ from counter_risk.config import WorkflowConfig, load_config from counter_risk.dates import derive_as_of_date, derive_run_date -from counter_risk.normalize import normalize_counterparty +from counter_risk.normalize import normalize_counterparty, resolve_counterparty from counter_risk.parsers import parse_fcm_totals, parse_futures_detail from counter_risk.pipeline.manifest import ManifestBuilder from counter_risk.pipeline.parsing_types import ( @@ -144,6 +144,7 @@ def reconcile_series_coverage( normalized_counterparties_in_data = _normalized_counterparties_from_parsed_data( parsed_sections ) + counterparty_sources_by_raw_name = _counterparty_sources_from_records(totals_records) clearing_houses_in_data = sorted( { value @@ -226,13 +227,28 @@ def reconcile_series_coverage( raw_display = ", ".join(raw_names) warnings.append( "Reconciliation unmapped counterparty in sheet " - f"{sheet_name!r}: raw={raw_display!r}, normalized={normalized_name!r}" + f"{sheet_name!r}: raw={raw_display!r}, normalized={normalized_name!r}, " + "source=" + + ",".join( + sorted( + { + counterparty_sources_by_raw_name.get(raw_name, "unmapped") + for raw_name in raw_names + } + ) + ) ) unmapped_counterparties.append( { "sheet": sheet_name, "raw_counterparty_labels": raw_names, "normalized_counterparty": normalized_name, + "source": sorted( + { + counterparty_sources_by_raw_name.get(raw_name, "unmapped") + for raw_name in raw_names + } + ), } ) @@ -360,11 +376,21 @@ def _normalized_counterparties_from_records( raw_name = str(record.get("counterparty", "")).strip() if not raw_name: continue - normalized_name = normalize_counterparty(raw_name) - normalized_to_raw.setdefault(normalized_name, set()).add(raw_name) + resolution = resolve_counterparty(raw_name) + normalized_to_raw.setdefault(resolution.canonical_name, set()).add(raw_name) return normalized_to_raw +def _counterparty_sources_from_records(totals_records: list[dict[str, Any]]) -> dict[str, str]: + sources_by_raw_name: dict[str, str] = {} + for record in totals_records: + raw_name = str(record.get("counterparty", "")).strip() + if not raw_name: + continue + sources_by_raw_name[raw_name] = resolve_counterparty(raw_name).source + return sources_by_raw_name + + def _normalized_counterparties_from_parsed_data( parsed_sections: Mapping[str, Any], ) -> dict[str, set[str]]: diff --git a/tests/pipeline/test_reconcile_series_coverage.py b/tests/pipeline/test_reconcile_series_coverage.py index 4f96e371..9a0f905b 100644 --- a/tests/pipeline/test_reconcile_series_coverage.py +++ b/tests/pipeline/test_reconcile_series_coverage.py @@ -3,6 +3,7 @@ from __future__ import annotations from inspect import Parameter, signature +from pathlib import Path import pytest @@ -264,6 +265,70 @@ def test_reconcile_series_coverage_does_not_warn_when_raw_labels_normalize_to_he assert not any("unmapped counterparty" in warning for warning in result["warnings"]) +def test_reconcile_series_coverage_includes_fallback_source_in_unmapped_warning( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True) + (config_dir / "name_registry.yml").write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: soc_gen_inc", + " display_name: Soc Gen Inc", + " aliases:", + " - Soc Gen Inc", + ] + ) + + "\n", + encoding="utf-8", + ) + monkeypatch.chdir(tmp_path) + + result = reconcile_series_coverage( + parsed_data_by_sheet={ + "Total": {"totals": [{"counterparty": "Societe Generale"}], "futures": []} + }, + historical_series_headers_by_sheet={"Total": ("Legacy Counterparty",)}, + ) + + assert any("source=fallback" in warning for warning in result["warnings"]) + + +def test_reconcile_series_coverage_includes_registry_source_in_unmapped_warning( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + config_dir = tmp_path / "config" + config_dir.mkdir(parents=True) + (config_dir / "name_registry.yml").write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: soc_gen_inc", + " display_name: Soc Gen Inc", + " aliases:", + " - Societe Generale", + ] + ) + + "\n", + encoding="utf-8", + ) + monkeypatch.chdir(tmp_path) + + result = reconcile_series_coverage( + parsed_data_by_sheet={ + "Total": {"totals": [{"counterparty": "Societe Generale"}], "futures": []} + }, + historical_series_headers_by_sheet={"Total": ("Legacy Counterparty",)}, + ) + + assert any("source=registry" in warning for warning in result["warnings"]) + + def test_normalized_counterparties_from_records_uses_normalization_mapping() -> None: totals_records = [ {"counterparty": "Bank of America, NA"}, From 73196dd35a1a3c8baa120215b71f3ea4ad107729 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:50:12 +0000 Subject: [PATCH 15/16] chore(codex-keepalive): apply updates (PR #228) --- src/counter_risk/normalize.py | 2 ++ tests/test_normalization_registry_first.py | 29 ++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/counter_risk/normalize.py b/src/counter_risk/normalize.py index 4cdd3cfc..d52b44dc 100644 --- a/src/counter_risk/normalize.py +++ b/src/counter_risk/normalize.py @@ -61,6 +61,8 @@ def _load_alias_lookup(registry_path: str) -> dict[str, str]: def _build_alias_lookup(registry: NameRegistryConfig) -> dict[str, str]: lookup: dict[str, str] = {} for entry in registry.entries: + lookup[_normalize_whitespace(entry.canonical_key).casefold()] = entry.display_name + lookup[_normalize_whitespace(entry.display_name).casefold()] = entry.display_name for alias in entry.aliases: lookup[_normalize_whitespace(alias).casefold()] = entry.display_name return lookup diff --git a/tests/test_normalization_registry_first.py b/tests/test_normalization_registry_first.py index 2a3a9c8c..e9b8ade6 100644 --- a/tests/test_normalization_registry_first.py +++ b/tests/test_normalization_registry_first.py @@ -8,6 +8,7 @@ import pytest +from counter_risk.normalize import resolve_counterparty from counter_risk.pipeline.run import reconcile_series_coverage from counter_risk.reports.mapping_diff import generate_mapping_diff_report @@ -56,6 +57,34 @@ def test_mapping_diff_report_changes_between_before_and_after_registry_states() assert "Societe Generale -> Soc Gen\n" not in after_report +def test_resolve_counterparty_uses_registry_direct_canonical_match_before_fallback( + tmp_path: Path, +) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: soc_gen", + " display_name: Soc Gen", + " aliases:", + " - SG", + ] + ) + + "\n", + encoding="utf-8", + ) + + display_name_match = resolve_counterparty("Soc Gen", registry_path=registry_path) + canonical_key_match = resolve_counterparty("soc_gen", registry_path=registry_path) + + assert display_name_match.canonical_name == "Soc Gen" + assert display_name_match.source == "registry" + assert canonical_key_match.canonical_name == "Soc Gen" + assert canonical_key_match.source == "registry" + + def test_reconciliation_with_after_registry_has_no_societe_generale_warning( caplog: pytest.LogCaptureFixture, tmp_path: Path, From 2e05dba0b7e98416e4f1ebe844ee392c048b8d36 Mon Sep 17 00:00:00 2001 From: Codex Agent Date: Tue, 24 Feb 2026 16:06:09 +0000 Subject: [PATCH 16/16] Unify counterparty resolution maps and canonicalize registry lookup --- src/counter_risk/normalize.py | 8 ++++---- src/counter_risk/pipeline/run.py | 29 ++++++++++++++--------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/counter_risk/normalize.py b/src/counter_risk/normalize.py index 3ba345be..0b8fb5dc 100644 --- a/src/counter_risk/normalize.py +++ b/src/counter_risk/normalize.py @@ -111,10 +111,10 @@ def _load_alias_lookup(registry_path: str) -> dict[str, str]: def _build_alias_lookup(registry: NameRegistryConfig) -> dict[str, str]: lookup: dict[str, str] = {} for entry in registry.entries: - lookup[_normalize_whitespace(entry.canonical_key).casefold()] = entry.display_name - lookup[_normalize_whitespace(entry.display_name).casefold()] = entry.display_name + lookup[canonicalize_name(entry.canonical_key).casefold()] = entry.display_name + lookup[canonicalize_name(entry.display_name).casefold()] = entry.display_name for alias in entry.aliases: - lookup[_normalize_whitespace(alias).casefold()] = entry.display_name + lookup[canonicalize_name(alias).casefold()] = entry.display_name return lookup @@ -125,7 +125,7 @@ def resolve_counterparty( ) -> NameResolution: """Resolve counterparty name with registry-first semantics.""" - normalized = _normalize_whitespace(name) + normalized = canonicalize_name(name) alias_lookup = _load_alias_lookup(str(Path(registry_path).resolve())) registry_match = alias_lookup.get(normalized.casefold()) if registry_match is not None: diff --git a/src/counter_risk/pipeline/run.py b/src/counter_risk/pipeline/run.py index 7a0ccd30..3b06a047 100644 --- a/src/counter_risk/pipeline/run.py +++ b/src/counter_risk/pipeline/run.py @@ -143,10 +143,10 @@ def reconcile_series_coverage( if value } ) - normalized_counterparties_in_data = _normalized_counterparties_from_parsed_data( - parsed_sections - ) - counterparty_sources_by_raw_name = _counterparty_sources_from_records(totals_records) + ( + normalized_counterparties_in_data, + counterparty_sources_by_raw_name, + ) = _counterparty_resolution_maps_from_records(totals_records) clearing_houses_in_data = sorted( { value @@ -372,27 +372,26 @@ def _extract_segments_from_records(parsed_sections: Mapping[str, Any]) -> set[st return segments -def _normalized_counterparties_from_records( +def _counterparty_resolution_maps_from_records( totals_records: list[dict[str, Any]], -) -> dict[str, set[str]]: +) -> tuple[dict[str, set[str]], dict[str, str]]: normalized_to_raw: dict[str, set[str]] = {} + sources_by_raw_name: dict[str, str] = {} for record in totals_records: raw_name = str(record.get("counterparty", "")).strip() if not raw_name: continue resolution = resolve_counterparty(raw_name) normalized_to_raw.setdefault(resolution.canonical_name, set()).add(raw_name) - return normalized_to_raw + sources_by_raw_name[raw_name] = resolution.source + return normalized_to_raw, sources_by_raw_name -def _counterparty_sources_from_records(totals_records: list[dict[str, Any]]) -> dict[str, str]: - sources_by_raw_name: dict[str, str] = {} - for record in totals_records: - raw_name = str(record.get("counterparty", "")).strip() - if not raw_name: - continue - sources_by_raw_name[raw_name] = resolve_counterparty(raw_name).source - return sources_by_raw_name +def _normalized_counterparties_from_records( + totals_records: list[dict[str, Any]], +) -> dict[str, set[str]]: + normalized_to_raw, _ = _counterparty_resolution_maps_from_records(totals_records) + return normalized_to_raw def _normalized_counterparties_from_parsed_data(