Skip to content
506 changes: 506 additions & 0 deletions .agents/issue-48-ledger.yml

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions config/name_registry.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Top-level schema:
# - schema_version: required integer, currently must be 1.
# - entries: required non-empty list of registry entry objects.
# Schema version for forward-compatible validation rules.
schema_version: 1

# Canonical counterparty/series registry used for deterministic name normalization.
entries:
- canonical_key: citibank # Required snake_case key: ^[a-z0-9]+(?:_[a-z0-9]+)*$, unique across entries.
display_name: Citibank # Required canonical series/header label (1-80 chars, workbook-safe punctuation).
aliases: # Required list[str], at least one value, deduplicated case-insensitively after whitespace normalization.
- Citibank
- Citigroup
# Optional per-variant include flags. If omitted, defaults to included for all variants.
series_included:
all_programs: true
ex_trend: true
trend: true

- canonical_key: bank_of_america # canonical_key values must remain globally unique.
display_name: Bank of America
aliases:
- Bank of America
- Bank of America, NA
- Bank of America NA

- canonical_key: goldman_sachs
display_name: Goldman Sachs
aliases:
- Goldman Sachs
- Goldman Sachs Int'l

- canonical_key: soc_gen
display_name: Soc Gen
aliases:
- Soc Gen
- Societe Generale

- canonical_key: barclays
display_name: Barclays
aliases:
- Barclays
- Barclays Bank PLC

- canonical_key: cme
display_name: CME
aliases:
- CME
- CME Clearing House

- canonical_key: ice
display_name: ICE
aliases:
- ICE
- ICE Clear U.S.
- ICE Clear US

- canonical_key: ice_euro
display_name: ICE Euro
aliases:
- ICE Euro
- ICE Clear Europe
series_included:
all_programs: true
ex_trend: true
trend: false

- canonical_key: eurex
display_name: EUREX
aliases:
- EUREX
- EUREX Clearing

- canonical_key: japan_scc
display_name: Japan SCC
aliases:
- Japan SCC
- Japan Securities Clearing Corporation

- canonical_key: korea_exchange
display_name: Korea Exchange
aliases:
- Korea Exchange
- Korea Exchange (in-house)
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ disable_error_code = ["import-untyped"]
module = "counter_risk.mosers.workbook_generation"
disable_error_code = ["import-untyped"]

[[tool.mypy.overrides]]
module = "counter_risk.name_registry"
disable_error_code = ["import-untyped"]

[[tool.mypy.overrides]]
module = "scripts.*"
ignore_errors = true
Expand Down
139 changes: 139 additions & 0 deletions src/counter_risk/name_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Name registry parsing and validation helpers."""

from __future__ import annotations

import re
from pathlib import Path
from typing import Any, Literal

import yaml
from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator, model_validator

_CANONICAL_KEY_PATTERN = re.compile(r"^[a-z0-9]+(?:_[a-z0-9]+)*$")


def _normalize_alias_token(value: str) -> str:
return " ".join(value.split()).casefold()


class SeriesIncludedFlags(BaseModel):
"""Optional per-variant inclusion flags for a canonical series."""

model_config = ConfigDict(extra="forbid")

all_programs: bool = True
ex_trend: bool = True
trend: bool = True


class NameRegistryEntry(BaseModel):
"""A single canonical mapping entry."""

model_config = ConfigDict(extra="forbid")

canonical_key: str
aliases: list[str] = Field(min_length=1)
display_name: str = Field(min_length=1, max_length=80)
series_included: SeriesIncludedFlags | None = None

@field_validator("canonical_key")
@classmethod
def _validate_canonical_key(cls, value: str) -> str:
if not _CANONICAL_KEY_PATTERN.fullmatch(value):
raise ValueError(
"canonical_key must match ^[a-z0-9]+(?:_[a-z0-9]+)*$ (snake_case lowercase)."
)
return value

@field_validator("aliases")
@classmethod
def _validate_aliases(cls, aliases: list[str]) -> list[str]:
normalized_seen: set[str] = set()
normalized_aliases: list[str] = []

for alias in aliases:
if not isinstance(alias, str):
raise ValueError("aliases entries must be strings.")
normalized = " ".join(alias.split())
if not normalized:
raise ValueError("aliases cannot contain blank values.")
dedupe_key = normalized.casefold()
if dedupe_key in normalized_seen:
raise ValueError(
f"aliases contains duplicate value after normalization: {normalized!r}"
)
normalized_seen.add(dedupe_key)
normalized_aliases.append(normalized)
return normalized_aliases

@field_validator("display_name")
@classmethod
def _validate_display_name(cls, value: str) -> str:
normalized = " ".join(value.split())
if not normalized:
raise ValueError("display_name cannot be blank.")
return normalized


class NameRegistryConfig(BaseModel):
"""Top-level registry schema."""

model_config = ConfigDict(extra="forbid")

schema_version: Literal[1]
entries: list[NameRegistryEntry] = Field(min_length=1)

@model_validator(mode="after")
def _validate_global_uniqueness(self) -> NameRegistryConfig:
canonical_keys: set[str] = set()
alias_index: dict[str, str] = {}

for entry in self.entries:
if entry.canonical_key in canonical_keys:
raise ValueError(f"Duplicate canonical_key found: {entry.canonical_key!r}")
canonical_keys.add(entry.canonical_key)

for alias in entry.aliases:
alias_token = _normalize_alias_token(alias)
existing = alias_index.get(alias_token)
if existing is None:
alias_index[alias_token] = entry.canonical_key
continue
if existing != entry.canonical_key:
raise ValueError(
"Alias collision across entries: "
f"{alias!r} maps to both {existing!r} and {entry.canonical_key!r}"
)
return self


def _format_registry_validation_error(error: ValidationError) -> str:
lines = ["Name registry validation failed:"]
for issue in error.errors():
location = ".".join(str(part) for part in issue.get("loc", ()))
message = issue.get("msg", "Invalid value")
lines.append(f"- {location}: {message}")
return "\n".join(lines)


def load_name_registry(path: str | Path = Path("config/name_registry.yml")) -> NameRegistryConfig:
"""Load and validate a name registry YAML file from disk."""

config_path = Path(path)
try:
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
except OSError as exc:
raise ValueError(f"Unable to read name registry file '{config_path}': {exc}") from exc
except yaml.YAMLError as exc:
raise ValueError(f"Invalid YAML in name registry file '{config_path}': {exc}") from exc

data: Any = raw if raw is not None else {}
if not isinstance(data, dict):
raise ValueError(
f"Name registry file '{config_path}' must contain a top-level mapping/object."
)

try:
return NameRegistryConfig.model_validate(data)
except ValidationError as exc:
raise ValueError(_format_registry_validation_error(exc)) from exc
3 changes: 1 addition & 2 deletions src/counter_risk/parsers/exposure_maturity_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,7 @@ def _find_header_row_and_map(

missing_text = ", ".join(best_missing)
raise ExposureMaturityColumnsMissingError(
"Missing required headers in exposure maturity worksheet within scan range: "
f"{missing_text}"
f"Missing required headers in exposure maturity worksheet within scan range: {missing_text}"
)


Expand Down
9 changes: 4 additions & 5 deletions src/counter_risk/writers/historical_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,7 @@ def _validate_preserved_wal_cells(
)
if expected.value != value:
raise WorkbookValidationError(
"WAL append changed existing cell value at "
f"row={row_index} column={column_index}"
f"WAL append changed existing cell value at row={row_index} column={column_index}"
)
if expected.number_format != getattr(cell, "number_format", None):
raise WorkbookValidationError(
Expand Down Expand Up @@ -708,9 +707,9 @@ def append_wal_row(
columns=tuple(range(1, preserve_through_column + 1)),
)

worksheet.cell(row=append_target.append_row, column=append_target.date_column).value = (
px_date
)
worksheet.cell(
row=append_target.append_row, column=append_target.date_column
).value = px_date
worksheet.cell(row=append_target.append_row, column=append_target.wal_column).value = float(
wal_value
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_fixtures_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def test_fixture_workbooks_and_presentations_open() -> None:
and path.name not in already_validated_fixture_names
)
assert fixture_paths, f"No .pptx/.xlsx fixtures found under {fixtures_root}."
assert (
len(fixture_paths) >= 10
), "Expected representative fixture inventory under tests/fixtures."
assert len(fixture_paths) >= 10, (
"Expected representative fixture inventory under tests/fixtures."
)

workbook_fixtures = [path for path in fixture_paths if path.suffix.lower() == ".xlsx"]
presentation_fixtures = [path for path in fixture_paths if path.suffix.lower() == ".pptx"]
Expand Down
Loading
Loading