diff --git a/config/name_registry.yml b/config/name_registry.yml index 891dc282..702e7784 100644 --- a/config/name_registry.yml +++ b/config/name_registry.yml @@ -34,6 +34,7 @@ entries: display_name: Soc Gen aliases: - Soc Gen + - Soc Gen Inc - Societe Generale - canonical_key: barclays diff --git a/src/counter_risk/cli/mapping_diff_report.py b/src/counter_risk/cli/mapping_diff_report.py index 51535620..54b95e0e 100644 --- a/src/counter_risk/cli/mapping_diff_report.py +++ b/src/counter_risk/cli/mapping_diff_report.py @@ -34,6 +34,12 @@ def build_parser() -> argparse.ArgumentParser: default=[], help="Raw input name observed during reconciliation. Can be provided multiple times.", ) + parser.add_argument( + "--output-format", + choices=("text",), + default="text", + help="Report output format.", + ) return parser @@ -48,7 +54,11 @@ def main(argv: list[str] | None = None) -> int: "reconciliation": list(args.reconciliation_name), } try: - report = generate_mapping_diff_report(args.registry, input_sources) + report = generate_mapping_diff_report( + args.registry, + input_sources, + output_format=args.output_format, + ) except ValueError as exc: error_line = " ".join(str(exc).splitlines()) print(error_line, file=sys.stderr) diff --git a/src/counter_risk/normalize.py b/src/counter_risk/normalize.py index 0b8fb5dc..181b0d52 100644 --- a/src/counter_risk/normalize.py +++ b/src/counter_risk/normalize.py @@ -144,8 +144,49 @@ def normalize_counterparty(name: str) -> str: return resolve_counterparty(name).canonical_name +def normalize_counterparty_with_source( + name: str, + *, + registry_path: str | Path = Path("config/name_registry.yml"), +) -> NameResolution: + """Normalize a counterparty name and return full mapping metadata. + + The returned :class:`NameResolution` includes ``source`` indicating where + the mapping came from: + - ``"registry"`` when matched via configured name registry entries/aliases. + - ``"fallback"`` when matched via built-in fallback mappings. + - ``"unmapped"`` when no mapping is found and canonicalized input is used. + """ + + return resolve_counterparty(name, registry_path=registry_path) + + +def resolve_clearing_house( + name: str, + *, + registry_path: str | Path = Path("config/name_registry.yml"), +) -> NameResolution: + """Resolve clearing house name with registry-first semantics. + + Clearing-house source attribution is binary for reconciliation reporting: + registry hits are labeled ``"registry"``, and all non-registry paths are + labeled ``"fallback"`` (including identity/no-op normalization). + """ + + normalized = canonicalize_name(name) + alias_lookup = _load_alias_lookup(str(Path(registry_path).resolve())) + registry_match = alias_lookup.get(normalized.casefold()) + if registry_match is not None: + return NameResolution(raw_name=name, canonical_name=registry_match, source="registry") + + fallback_match = _CLEARING_HOUSE_FALLBACK_MAPPINGS.get(normalized) + if fallback_match is not None: + return NameResolution(raw_name=name, canonical_name=fallback_match, source="fallback") + + return NameResolution(raw_name=name, canonical_name=normalized, source="fallback") + + def normalize_clearing_house(name: str) -> str: """Normalize a clearing house name to the canonical historical workbook label.""" - normalized = _normalize_whitespace(name) - return _CLEARING_HOUSE_FALLBACK_MAPPINGS.get(normalized, normalized) + return resolve_clearing_house(name).canonical_name diff --git a/src/counter_risk/reports/mapping_diff.py b/src/counter_risk/reports/mapping_diff.py index b75a5e13..7cf49ebc 100644 --- a/src/counter_risk/reports/mapping_diff.py +++ b/src/counter_risk/reports/mapping_diff.py @@ -114,9 +114,14 @@ def _iter_input_names(input_sources: Mapping[str, Any]) -> Iterable[str]: def generate_mapping_diff_report( registry_path: str | Path, input_sources: Mapping[str, Any], + *, + output_format: str = "text", ) -> str: """Generate a deterministic mapping diff report.""" + if output_format != "text": + raise ValueError(f"Unsupported output format: {output_format}") + # Load once so missing/unreadable/invalid registry is treated as fatal for report generation. load_name_registry(registry_path) diff --git a/tests/fixtures/fallback_mapped_names.csv b/tests/fixtures/fallback_mapped_names.csv new file mode 100644 index 00000000..cef358a6 --- /dev/null +++ b/tests/fixtures/fallback_mapped_names.csv @@ -0,0 +1,4 @@ +raw_name +Societe Generale +Citigroup +Bank of America, NA diff --git a/tests/fixtures/name_registry_after.yml b/tests/fixtures/name_registry_after.yml index c13c49c9..fc31df8a 100644 --- a/tests/fixtures/name_registry_after.yml +++ b/tests/fixtures/name_registry_after.yml @@ -5,3 +5,8 @@ entries: aliases: - Soc Gen Inc - Societe Generale + - canonical_key: jp_morgan_chase + display_name: JPMorgan Chase + aliases: + - J.P. Morgan + - JP Morgan Chase diff --git a/tests/fixtures/unmapped_names.csv b/tests/fixtures/unmapped_names.csv new file mode 100644 index 00000000..f0bcdf17 --- /dev/null +++ b/tests/fixtures/unmapped_names.csv @@ -0,0 +1,3 @@ +raw_name +Unknown House +Zeta Broker diff --git a/tests/test_mapping_diff_report_cli.py b/tests/test_mapping_diff_report_cli.py index 71d8c239..f2988796 100644 --- a/tests/test_mapping_diff_report_cli.py +++ b/tests/test_mapping_diff_report_cli.py @@ -2,12 +2,15 @@ from __future__ import annotations +import csv import os import stat import subprocess import sys from pathlib import Path +from counter_risk.cli import mapping_diff_report + def _cli_cmd() -> list[str]: return [sys.executable, "-m", "counter_risk.cli.mapping_diff_report"] @@ -22,6 +25,13 @@ def _cli_env() -> dict[str, str]: return env +def _load_fixture_names(filename: str) -> list[str]: + fixture_path = Path("tests/fixtures") / filename + with fixture_path.open(newline="", encoding="utf-8") as fixture_file: + reader = csv.DictReader(fixture_file) + return [row["raw_name"] for row in reader] + + def test_mapping_diff_report_help_exits_zero() -> None: result = subprocess.run( [*_cli_cmd(), "--help"], @@ -34,6 +44,20 @@ def test_mapping_diff_report_help_exits_zero() -> None: assert "mapping_diff_report" in result.stdout +def test_mapping_diff_report_with_repo_registry_exits_zero() -> None: + result = subprocess.run( + [*_cli_cmd(), "--registry", "config/name_registry.yml"], + check=False, + capture_output=True, + text=True, + env=_cli_env(), + ) + assert result.returncode == 0 + assert "UNMAPPED" in result.stdout + assert "FALLBACK_MAPPED" in result.stdout + assert "SUGGESTIONS" in result.stdout + + def test_mapping_diff_report_missing_registry_exits_nonzero(tmp_path: Path) -> None: missing_registry = tmp_path / "missing_registry.yml" result = subprocess.run( @@ -119,3 +143,99 @@ def test_mapping_diff_report_deterministic_sections(tmp_path: Path) -> None: assert "UNMAPPED\nUnknown House\n" in first.stdout assert "FALLBACK_MAPPED\nSociete Generale -> Soc Gen\n" in first.stdout assert "SUGGESTIONS\nUnknown House -> Unknown House\n" in first.stdout + + +def test_mapping_diff_report_with_fixture_inputs_contains_required_sections() -> None: + fallback_names = _load_fixture_names("fallback_mapped_names.csv") + unmapped_names = _load_fixture_names("unmapped_names.csv") + + args: list[str] = [*_cli_cmd(), "--registry", "config/name_registry.yml"] + for name in fallback_names + unmapped_names: + args.extend(["--normalization-name", name]) + for name in unmapped_names: + args.extend(["--reconciliation-name", name]) + + result = subprocess.run(args, check=False, capture_output=True, text=True, env=_cli_env()) + + assert result.returncode == 0 + assert "UNMAPPED" in result.stdout + assert "FALLBACK_MAPPED" in result.stdout + assert "SUGGESTIONS" in result.stdout + + +def test_mapping_diff_report_forwards_registry_path_parameter( + tmp_path: Path, + monkeypatch, +) -> None: + captured_call: dict[str, object] = {} + + def _fake_generate_mapping_diff_report( + registry_path: Path, + input_sources: dict[str, list[str]], + *, + output_format: str = "text", + ) -> str: + captured_call["registry_path"] = registry_path + captured_call["input_sources"] = input_sources + captured_call["output_format"] = output_format + return "UNMAPPED\n\nFALLBACK_MAPPED\n\nSUGGESTIONS\n" + + monkeypatch.setattr( + mapping_diff_report, + "generate_mapping_diff_report", + _fake_generate_mapping_diff_report, + ) + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text("schema_version: 1\nentries: []\n", encoding="utf-8") + + exit_code = mapping_diff_report.main(["--registry", str(registry_path)]) + + assert exit_code == 0 + assert captured_call["registry_path"] == registry_path + assert captured_call["input_sources"] == {"normalization": [], "reconciliation": []} + assert captured_call["output_format"] == "text" + + +def test_mapping_diff_report_forwards_output_format_parameter( + tmp_path: Path, + monkeypatch, +) -> None: + captured_call: dict[str, object] = {} + + def _fake_generate_mapping_diff_report( + registry_path: Path, + input_sources: dict[str, list[str]], + *, + output_format: str = "text", + ) -> str: + captured_call["registry_path"] = registry_path + captured_call["input_sources"] = input_sources + captured_call["output_format"] = output_format + return "UNMAPPED\n\nFALLBACK_MAPPED\n\nSUGGESTIONS\n" + + monkeypatch.setattr( + mapping_diff_report, + "generate_mapping_diff_report", + _fake_generate_mapping_diff_report, + ) + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text("schema_version: 1\nentries: []\n", encoding="utf-8") + + exit_code = mapping_diff_report.main( + [ + "--registry", + str(registry_path), + "--output-format", + "text", + "--normalization-name", + "Societe Generale", + ] + ) + + assert exit_code == 0 + assert captured_call["registry_path"] == registry_path + assert captured_call["input_sources"] == { + "normalization": ["Societe Generale"], + "reconciliation": [], + } + assert captured_call["output_format"] == "text" diff --git a/tests/test_normalization_registry_first.py b/tests/test_normalization_registry_first.py index e9b8ade6..7e99f4c9 100644 --- a/tests/test_normalization_registry_first.py +++ b/tests/test_normalization_registry_first.py @@ -8,13 +8,18 @@ import pytest -from counter_risk.normalize import resolve_counterparty +import counter_risk.pipeline.run as pipeline_run +from counter_risk.normalize import ( + normalize_counterparty_with_source, + resolve_clearing_house, + resolve_counterparty, +) from counter_risk.pipeline.run import reconcile_series_coverage from counter_risk.reports.mapping_diff import generate_mapping_diff_report def _fixture_path(name: str) -> Path: - return Path("tests/fixtures") / name + return Path(__file__).resolve().parent / "fixtures" / name def _input_sources() -> dict[str, object]: @@ -85,6 +90,77 @@ def test_resolve_counterparty_uses_registry_direct_canonical_match_before_fallba assert canonical_key_match.source == "registry" +def test_resolve_clearing_house_returns_registry_source_when_name_is_in_registry( + tmp_path: Path, +) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: custom_ch", + " display_name: Custom Clearing House", + " aliases:", + " - Custom CH", + ] + ) + + "\n", + encoding="utf-8", + ) + + resolution = resolve_clearing_house("Custom CH", registry_path=registry_path) + + assert resolution.canonical_name == "Custom Clearing House" + assert resolution.source == "registry" + + +def test_resolve_clearing_house_returns_fallback_source_when_registry_has_no_match( + tmp_path: Path, +) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text("schema_version: 1\nentries: []\n", encoding="utf-8") + + resolution = resolve_clearing_house("ICE Clear US", registry_path=registry_path) + + assert resolution.canonical_name == "ICE" + assert resolution.source == "fallback" + + +def test_resolve_clearing_house_unknown_name_uses_identity_with_fallback_source( + tmp_path: Path, +) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text("schema_version: 1\nentries: []\n", encoding="utf-8") + + resolution = resolve_clearing_house("LCH", registry_path=registry_path) + + assert resolution.canonical_name == "LCH" + assert resolution.source == "fallback" + + +def test_normalize_counterparty_with_source_exposes_source_attribute(tmp_path: Path) -> None: + registry_path = tmp_path / "name_registry.yml" + registry_path.write_text( + "\n".join( + [ + "schema_version: 1", + "entries:", + " - canonical_key: soc_gen", + " display_name: Soc Gen", + " aliases:", + " - SG", + ] + ) + + "\n", + encoding="utf-8", + ) + + resolution = normalize_counterparty_with_source("SG", registry_path=registry_path) + + assert resolution.source == "registry" + + def test_reconciliation_with_after_registry_has_no_societe_generale_warning( caplog: pytest.LogCaptureFixture, tmp_path: Path, @@ -109,3 +185,45 @@ def test_reconciliation_with_after_registry_has_no_societe_generale_warning( assert result["warnings"] assert not any("Societe Generale" in warning for warning in result["warnings"]) assert all("Societe Generale" not in record.getMessage() for record in caplog.records) + + +def test_reconciliation_sources_differ_between_before_and_after_registry( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + def _run_with_fixture(fixture_name: str, run_dir: Path) -> list[str]: + fixture_path = _fixture_path(fixture_name).resolve() + config_dir = run_dir / "config" + config_dir.mkdir(parents=True) + shutil.copyfile( + fixture_path, + config_dir / "name_registry.yml", + ) + monkeypatch.chdir(run_dir) + + captured_sources: list[str] = [] + original = pipeline_run.resolve_counterparty + + def _capture_source(raw_name: str): + resolution = original(raw_name) + captured_sources.append(resolution.source) + return resolution + + monkeypatch.setattr(pipeline_run, "resolve_counterparty", _capture_source) + reconcile_series_coverage( + parsed_data_by_sheet={ + "Total": { + "totals": [{"counterparty": "Societe Generale"}], + "futures": [], + } + }, + historical_series_headers_by_sheet={"Total": ("Soc Gen Inc",)}, + ) + return captured_sources + + before_sources = _run_with_fixture("name_registry_before.yml", tmp_path / "before") + after_sources = _run_with_fixture("name_registry_after.yml", tmp_path / "after") + + assert "fallback" in before_sources + assert "registry" in after_sources + assert before_sources != after_sources