diff --git a/CHANGELOG.md b/CHANGELOG.md index f2d1fa1..ac8997a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,30 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht ## [Unreleased] +## [0.47.0] - 2026-05-31 + +**Theme: tenant isolation across the evidence path. The reference server can no +longer leak one tenant's audit chain to another, and tenant identity is now part +of the tamper-evident hash chain itself.** + +### Security +- Tenant identity is now bound into the audit hash chain. `AuditRecord` carries + a `chain_version`; records written from this release on (chain v2) fold + `tenant_id` into `compute_hash`, so re-attributing a record to another tenant + after the fact breaks `verify_chain()` instead of passing silently. Pre-v0.47 + records (chain v1) keep `tenant_id` out of the hash and re-verify byte for + byte, so existing trails and signed exports stay valid. The SQLite store gains + a `chain_version` column (schema v4) with a migration defaulting legacy rows to + v1. The standalone verifier mirrors the same rule. +- The reference HTTP server's audit-chain read (`GET /v1/audit/actions/{id}/chain`) + is now tenant-scoped: a caller can no longer read another tenant's action chain + by guessing an `action_id`. Unknown and cross-tenant actions both return 404 + with an identical body, so the response is not an existence oracle. The scoped + read also resolves chain positions in one pass, removing an O(n^2) lookup. +- SSE notification broadcast is now tenant-scoped: upstream-pushed notifications + on a shared upstream no longer fan out across tenants. Unattributable log + notifications (no progressToken) broadcast only within a single tenant scope. + ## [0.46.0] - 2026-05-31 **Theme: multi-tenant runtime governance, made real. A hardening release that diff --git a/clients/ts/package.json b/clients/ts/package.json index 9c10e6f..5dab962 100644 --- a/clients/ts/package.json +++ b/clients/ts/package.json @@ -1,6 +1,6 @@ { "name": "@vaara/client", - "version": "0.46.0", + "version": "0.47.0", "mcpName": "io.github.vaaraio/vaara", "description": "TypeScript client for the Vaara HTTP API. Conformal risk scoring, hash-chained audit, policy reload, named detectors.", "main": "dist/index.js", diff --git a/pyproject.toml b/pyproject.toml index 9f990cc..43641b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "vaara" -version = "0.46.0" +version = "0.47.0" description = "Tamper-evident runtime evidence layer for AI agents: conformal risk scoring, hash-chained audit trails, and signed attestation plus execution receipts per MCP tool call" requires-python = ">=3.10" license = "Apache-2.0" diff --git a/server-vaara-server.json b/server-vaara-server.json index fa38f32..c753ad3 100644 --- a/server-vaara-server.json +++ b/server-vaara-server.json @@ -8,13 +8,13 @@ "url": "https://github.com/vaaraio/vaara", "source": "github" }, - "version": "0.46.0", + "version": "0.47.0", "packages": [ { "registryType": "pypi", "registryBaseUrl": "https://pypi.org", "identifier": "vaara", - "version": "0.46.0", + "version": "0.47.0", "runtimeHint": "uvx", "transport": { "type": "stdio" diff --git a/server.json b/server.json index 8b11de0..e01ebce 100644 --- a/server.json +++ b/server.json @@ -8,13 +8,13 @@ "url": "https://github.com/vaaraio/vaara", "source": "github" }, - "version": "0.46.0", + "version": "0.47.0", "packages": [ { "registryType": "pypi", "registryBaseUrl": "https://pypi.org", "identifier": "vaara", - "version": "0.46.0", + "version": "0.47.0", "runtimeHint": "uvx", "transport": { "type": "stdio" diff --git a/src/vaara/__init__.py b/src/vaara/__init__.py index 8045de4..6efb4b3 100644 --- a/src/vaara/__init__.py +++ b/src/vaara/__init__.py @@ -6,7 +6,7 @@ oversight. """ -__version__ = "0.46.0" +__version__ = "0.47.0" from vaara.pipeline import InterceptionPipeline, InterceptionResult diff --git a/src/vaara/audit/sqlite_backend.py b/src/vaara/audit/sqlite_backend.py index 06cd253..85f5181 100644 --- a/src/vaara/audit/sqlite_backend.py +++ b/src/vaara/audit/sqlite_backend.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -SCHEMA_VERSION = 3 +SCHEMA_VERSION = 4 def _scrub_nonfinite(obj: Any) -> Any: @@ -81,7 +81,11 @@ def _strict_json_dumps(obj: Any) -> str: system_operation TEXT, data_usage TEXT, decision_making TEXT, - limitations TEXT + limitations TEXT, + -- v0.47 schema v4: hash-chain format version. Records written before + -- tenant binding carry 1 (tenant_id NOT in the hash); v2+ bind it. + -- Kept last so the column index matches the ALTER in _MIGRATIONS[3]. + chain_version INTEGER NOT NULL DEFAULT 1 ); CREATE INDEX IF NOT EXISTS idx_action_id ON audit_records(action_id); @@ -140,6 +144,13 @@ def _strict_json_dumps(obj: Any) -> str: ALTER TABLE audit_records ADD COLUMN decision_making TEXT; ALTER TABLE audit_records ADD COLUMN limitations TEXT; """, + # v3 to v4: hash-chain format version (v0.47). Existing rows default to + # 1 — their record_hash was computed without tenant_id and stays valid + # (NOT re-hashed on load), so chain verification of historical records + # keeps passing. New records are written with chain_version=2. + 3: """ + ALTER TABLE audit_records ADD COLUMN chain_version INTEGER NOT NULL DEFAULT 1; + """, } @@ -340,11 +351,12 @@ def write_record(self, record: AuditRecord) -> None: (record_id, action_id, event_type, timestamp, agent_id, tool_name, data, regulatory, previous_hash, record_hash, seq, tenant_id, - system_operation, data_usage, decision_making, limitations) + system_operation, data_usage, decision_making, limitations, + chain_version) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, COALESCE((SELECT MAX(seq) FROM audit_records), -1) + 1, - ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ? )""", ( record.record_id, @@ -358,14 +370,22 @@ def write_record(self, record: AuditRecord) -> None: record.previous_hash, record.record_hash, # Per-record tenant_id wins so a single backend instance - # can serve a multi-tenant runtime (v0.40+). Empty record - # tenant_id falls back to instance scope for the legacy - # single-tenant init path. - record.tenant_id or self._tenant_id, + # can serve a multi-tenant runtime (v0.40+). For chain v2+ + # tenant_id is bound into record_hash, so the stored value + # MUST equal the hashed value or the chain won't re-verify + # on reload — the instance-scope substitution is therefore + # confined to legacy v1 records, where tenant_id was never + # hashed (keeps tenant-scoped backends tagging old-style + # empty-tenant writes; see test_purge_is_tenant_scoped). + record.tenant_id if record.chain_version >= 2 + else (record.tenant_id or self._tenant_id), record.system_operation, record.data_usage, record.decision_making, record.limitations, + # Persist the chain format so the tenant binding (or its + # absence on legacy records) re-verifies on reload. + record.chain_version, ), ) @@ -661,28 +681,35 @@ def list_redactions(self) -> list[dict]: def _row_to_record(self, row: tuple) -> AuditRecord: """Convert a database row to an AuditRecord, applying GDPR redactions. - Column layout (schema v3): + Column layout (schema v4): row[0..9] record_id, action_id, event_type, timestamp, agent_id, tool_name, data, regulatory, previous_hash, record_hash row[10] seq row[11] tenant_id row[12..15] system_operation, data_usage, decision_making, limitations + row[16] chain_version Pre-v0.6 records (migrated from schema v2) carry NULL for the transparency-taxonomy columns. Their original record_hash was computed without those fields and stays valid — we do NOT - re-hash on load. + re-hash on load. Likewise pre-v0.47 records carry chain_version 1 + (the migration default), so tenant_id stays outside their hash and + the chain re-verifies exactly as written. """ agent_id = row[4] if self._redaction_cache and agent_id in self._redaction_cache: agent_id = self._redaction_cache[agent_id] # Defensive indexing: rows from older queries may not include - # the v3 columns. Use a guard so loading old DBs still works. + # later-schema columns. Use a guard so loading old DBs still works. tenant_id = row[11] if len(row) > 11 else "" sys_op = row[12] if len(row) > 12 else None data_use = row[13] if len(row) > 13 else None dec_mk = row[14] if len(row) > 14 else None lims = row[15] if len(row) > 15 else None + # chain_version drives whether tenant_id is part of compute_hash; + # default 1 (tenant outside hash) for any short/legacy row so the + # reconstructed record re-hashes to its stored record_hash. + chain_version = row[16] if len(row) > 16 and row[16] is not None else 1 return AuditRecord( record_id=row[0], action_id=row[1], @@ -699,6 +726,7 @@ def _row_to_record(self, row: tuple) -> AuditRecord: data_usage=data_use, decision_making=dec_mk, limitations=lims, + chain_version=chain_version, ) # ── Backup ──────────────────────────────────────────────────── diff --git a/src/vaara/audit/trail.py b/src/vaara/audit/trail.py index 50c32f9..6bd42b0 100644 --- a/src/vaara/audit/trail.py +++ b/src/vaara/audit/trail.py @@ -234,6 +234,17 @@ class RegulatoryArticle: # ── Audit Record ────────────────────────────────────────────────────────── +# Hash-chain format version stamped on every newly appended record. +# v1 (legacy): tenant_id is NOT part of compute_hash() — preserves +# re-verification of pre-v0.47 trails written before tenant binding. +# v2: tenant_id and chain_version ARE bound into the hash, so a record +# cannot be silently re-attributed to another tenant (or downgraded +# to v1 to strip the binding) without breaking the chain. +# Records loaded from storage keep their own stored chain_version; only +# AuditTrail._append stamps the current version on fresh records. +_CURRENT_CHAIN_VERSION = 2 + + @dataclass class AuditRecord: """A single immutable audit event in the trail. @@ -259,8 +270,12 @@ class AuditRecord: decision_making: Optional[str] = None limitations: Optional[str] = None # v0.40: multi-tenant scoping. Empty string = single-tenant deployment. - # Excluded from compute_hash() to preserve pre-v0.40 chain re-verification. + # Bound into compute_hash() from chain v2 (v0.47+); see chain_version. tenant_id: str = "" + # Hash-chain format version (see _CURRENT_CHAIN_VERSION). Defaults to 1 + # so records deserialized from pre-v0.47 storage (which carry no + # chain_version column/key) re-hash exactly as originally written. + chain_version: int = 1 def __post_init__(self) -> None: # Loaded-from-DB records carry a non-empty record_hash. Skip @@ -305,6 +320,16 @@ def compute_hash(self) -> str: "regulatory_articles": self.regulatory_articles, "previous_hash": self.previous_hash, } + # Chain v2 (v0.47+): bind tenant_id into the tamper-evident surface + # so a record cannot be silently re-attributed to another tenant. + # chain_version is bound too, so a downgrade to v1 (which would drop + # the tenant binding) also breaks the chain. v1 records omit both + # keys and hash exactly as pre-v0.47 — old trails re-verify byte for + # byte. The gate is >= 2 so a future v3 keeps binding these unless it + # deliberately changes the scheme. + if self.chain_version >= 2: + content["tenant_id"] = self.tenant_id + content["chain_version"] = self.chain_version # NOTE on transparency taxonomy (v0.6): # The four prEN ISO/IEC 12792 fields (system_operation, data_usage, # decision_making, limitations) are NOT included in the hash. They @@ -993,6 +1018,33 @@ def get_action_trail(self, action_id: str) -> list[AuditRecord]: with self._lock: return list(self._by_action.get(action_id, [])) + def get_action_chain_scoped( + self, action_id: str, tenant_id: str = "" + ) -> list[tuple[int, AuditRecord]]: + """Tenant-scoped chain read for the reference server. + + Returns ``(chain_position, record)`` pairs for ``action_id``, but only + the records whose ``tenant_id`` matches the caller's ``tenant_id``. A + caller scoped to one tenant can never read another tenant's records, + and the empty-string tenant (single-tenant deployments) only ever sees + empty-tenant records. Positions are resolved in a single pass under the + lock rather than an ``O(n)`` ``index()`` per record. + + Returns an empty list both when the action is unknown and when it + belongs to a different tenant — the caller maps both to 404, so a + cross-tenant probe cannot use the response to confirm an action_id + exists for another tenant. + """ + want = tenant_id or "" + with self._lock: + if action_id not in self._by_action: + return [] + return [ + (pos, r) + for pos, r in enumerate(self._records) + if r.action_id == action_id and (r.tenant_id or "") == want + ] + def get_agent_records( self, agent_id: str, limit: int = 100 ) -> list[AuditRecord]: @@ -1098,6 +1150,11 @@ def _append(self, record: AuditRecord) -> None: record.data = {str(k): json_safe(v) for k, v in record.data.items()} with self._lock: record.previous_hash = self._last_hash + # Stamp the current chain format on every fresh record so its + # tenant_id is bound into the hash. Records reloaded from storage + # never pass through _append, so their stored version is left + # intact and old trails keep re-verifying. + record.chain_version = _CURRENT_CHAIN_VERSION record.record_hash = record.compute_hash() self._last_hash = record.record_hash diff --git a/src/vaara/audit/verify.py b/src/vaara/audit/verify.py index 4466a6d..7ac653f 100644 --- a/src/vaara/audit/verify.py +++ b/src/vaara/audit/verify.py @@ -99,7 +99,8 @@ def _verify_chain_bytes(trail_bytes: bytes) -> Optional[str]: f"record {idx}: previous_hash mismatch " f"(expected {prev_hash!r}, got {rec.get('previous_hash')!r})" ) - # Recompute hash over the canonical content. + # Recompute hash over the canonical content. Mirrors + # AuditRecord.compute_hash — keep the two in lockstep. content = { "record_id": rec.get("record_id"), "action_id": rec.get("action_id"), @@ -111,6 +112,13 @@ def _verify_chain_bytes(trail_bytes: bytes) -> Optional[str]: "regulatory_articles": rec.get("regulatory_articles", []), "previous_hash": prev_hash, } + # Chain v2 (v0.47+) binds tenant_id and chain_version into the hash. + # Records with no chain_version key are legacy v1 and omit both, so + # pre-v0.47 trails re-verify unchanged. + chain_version = rec.get("chain_version", 1) + if isinstance(chain_version, int) and chain_version >= 2: + content["tenant_id"] = rec.get("tenant_id", "") + content["chain_version"] = chain_version canonical = json.dumps( content, sort_keys=True, separators=(",", ":"), allow_nan=False ) diff --git a/src/vaara/integrations/_mcp_notify.py b/src/vaara/integrations/_mcp_notify.py index d05e22f..801bdf4 100644 --- a/src/vaara/integrations/_mcp_notify.py +++ b/src/vaara/integrations/_mcp_notify.py @@ -38,6 +38,7 @@ def deliver( *, session_id: Optional[str] = None, upstream: str = "default", + tenant: Optional[str] = None, ) -> None: ... @@ -54,6 +55,7 @@ def deliver( *, session_id: Optional[str] = None, upstream: str = "default", + tenant: Optional[str] = None, ) -> None: with self._lock: sys.stdout.write(strict_json_dumps(message) + "\n") @@ -121,9 +123,13 @@ def close(self) -> None: class HttpRouter: """Per-session SSE notification delivery for Streamable HTTP transport. - ``deliver`` with a session_id targets exactly that session; without one - (log notifications carry no progressToken) it broadcasts across every - registered session on the matching upstream. + ``deliver`` with a session_id targets exactly that session. Without one, + an ``tenant``-attributed broadcast (a progress notification correlated to + a tools/call) reaches only that tenant's sessions on the upstream; an + unattributed broadcast (a server-level log with no progressToken) reaches + every session on the upstream only when they all share one tenant scope, + and is otherwise suppressed so one tenant's upstream push cannot leak to + another tenant subscribed to the same upstream. """ def __init__(self, replay_buffer_size: int = _DEFAULT_REPLAY_BUFFER) -> None: @@ -184,15 +190,37 @@ def deliver( *, session_id: Optional[str] = None, upstream: str = "default", + tenant: Optional[str] = None, ) -> None: with self._lock: if session_id is not None: state = self._sessions.get(session_id) targets = [state] if state is not None else [] else: - targets = [ + candidates = [ s for s in self._sessions.values() if s.upstream == upstream ] + if tenant is not None: + # Attributable broadcast (progress correlated to a + # tools/call): only the originating tenant's sessions. + targets = [s for s in candidates if s.tenant == tenant] + else: + # Unattributable broadcast (a server-level log with no + # progressToken). Safe only when every subscriber on this + # upstream shares one tenant scope; across distinct tenants + # it would leak one tenant's upstream push to another, so + # suppress rather than fan out. + scopes = {s.tenant for s in candidates} + if len(scopes) <= 1: + targets = candidates + else: + logger.debug( + "suppressing unattributable broadcast on upstream " + "%s spanning %d tenant scopes", + upstream, + len(scopes), + ) + targets = [] for state in targets: state.enqueue(message) diff --git a/src/vaara/integrations/mcp_proxy.py b/src/vaara/integrations/mcp_proxy.py index 9455d88..f8e8aec 100644 --- a/src/vaara/integrations/mcp_proxy.py +++ b/src/vaara/integrations/mcp_proxy.py @@ -1343,6 +1343,7 @@ def _on_upstream_notification(self, upstream_name: str, message: dict) -> None: # subscribed to this upstream (HttpRouter handles broadcast when # session_id is None; StdioRouter ignores both args). session_id: Optional[str] = None + tenant: Optional[str] = None if method == "notifications/progress": params = message.get("params") if isinstance(message, dict) else None if isinstance(params, dict): @@ -1351,10 +1352,19 @@ def _on_upstream_notification(self, upstream_name: str, message: dict) -> None: with self._inflight_lock: entry = self._inflight_progress.get(token) if entry is not None: + # entry = (action_id, agent_id, tool_name, tenant, session) + tenant = entry[3] captured_session = entry[4] if captured_session: session_id = captured_session - self._router.deliver(message, session_id=session_id, upstream=upstream_name) + # tenant stays None for log notifications (no progressToken), so the + # router suppresses cross-tenant fan-out it cannot attribute. + self._router.deliver( + message, + session_id=session_id, + upstream=upstream_name, + tenant=tenant, + ) @staticmethod def _progress_token(params: dict) -> Any: diff --git a/src/vaara/server/routes.py b/src/vaara/server/routes.py index e04a8ee..1995dbc 100644 --- a/src/vaara/server/routes.py +++ b/src/vaara/server/routes.py @@ -188,9 +188,16 @@ async def append_audit_event( "/v1/audit/actions/{action_id}/chain", response_model=S.AuditChain, ) - async def read_action_chain(action_id: str): - records = state.audit._by_action.get(action_id, []) - if not records: + async def read_action_chain( + action_id: str, + x_vaara_tenant: Optional[str] = Header(default=None, alias="X-Vaara-Tenant"), + ): + tenant_id = (x_vaara_tenant or "").strip() + scoped = state.audit.get_action_chain_scoped(action_id, tenant_id) + if not scoped: + # Unknown action and cross-tenant action both 404 with the same + # message: a caller scoped to one tenant gets no signal that an + # action_id exists for another tenant. raise _error( "unknown_action", f"no audit records for {action_id!r}", status.HTTP_404_NOT_FOUND, @@ -201,13 +208,13 @@ async def read_action_chain(action_id: str): S.AuditChainEvent( event_id=r.record_id, event_type=r.event_type.value, - chain_position=state.audit._records.index(r), + chain_position=pos, event_hash=r.record_hash, previous_hash=r.previous_hash, timestamp=_iso(r.timestamp), payload=r.data or {}, ) - for r in records + for pos, r in scoped ], ) diff --git a/tests/test_integrations_mcp_proxy.py b/tests/test_integrations_mcp_proxy.py index 156abbd..acc4823 100644 --- a/tests/test_integrations_mcp_proxy.py +++ b/tests/test_integrations_mcp_proxy.py @@ -508,7 +508,7 @@ def test_progress_notification_forwards_to_client(monkeypatch): forwarded: list[dict] = [] monkeypatch.setattr( p._router, "deliver", - lambda message, *, session_id=None, upstream="default": forwarded.append(message), + lambda message, *, session_id=None, upstream="default", tenant=None: forwarded.append(message), ) msg = { "jsonrpc": "2.0", "method": "notifications/progress", @@ -535,7 +535,7 @@ def test_non_governed_notification_still_forwards_without_audit(monkeypatch): forwarded: list[dict] = [] monkeypatch.setattr( p._router, "deliver", - lambda message, *, session_id=None, upstream="default": forwarded.append(message), + lambda message, *, session_id=None, upstream="default", tenant=None: forwarded.append(message), ) msg = { "jsonrpc": "2.0", "method": "notifications/resources/list_changed", @@ -552,7 +552,7 @@ def test_audit_failure_in_notification_does_not_break_forwarding(monkeypatch): forwarded: list[dict] = [] monkeypatch.setattr( p._router, "deliver", - lambda message, *, session_id=None, upstream="default": forwarded.append(message), + lambda message, *, session_id=None, upstream="default", tenant=None: forwarded.append(message), ) msg = { "jsonrpc": "2.0", "method": "notifications/message", diff --git a/tests/test_mcp_notify.py b/tests/test_mcp_notify.py index c5871e6..ad3a582 100644 --- a/tests/test_mcp_notify.py +++ b/tests/test_mcp_notify.py @@ -77,6 +77,57 @@ def test_http_router_deliver_without_session_broadcasts_on_upstream(): loop.close() +def test_http_router_attributed_broadcast_is_tenant_scoped(): + # Two tenants subscribed to the SAME upstream slot. A progress + # notification correlated to tenant t1's call (tenant="t1", no session_id) + # must reach only t1's sessions, never t2's. + router = HttpRouter(replay_buffer_size=10) + loop = _new_loop() + try: + t1 = router.register_session("sess-1", "alpha", "t1", loop) + t2 = router.register_session("sess-2", "alpha", "t2", loop) + msg = {"jsonrpc": "2.0", "method": "notifications/progress"} + router.deliver(msg, session_id=None, upstream="alpha", tenant="t1") + assert t1.replay_since(0) == [(1, msg)] + assert t2.replay_since(0) == [] + finally: + loop.close() + + +def test_http_router_unattributable_broadcast_suppressed_across_tenants(): + # A log notification (no progressToken, tenant=None) on an upstream shared + # by two distinct tenants is suppressed rather than fanned out, so one + # tenant's upstream push cannot leak to another. + router = HttpRouter(replay_buffer_size=10) + loop = _new_loop() + try: + t1 = router.register_session("sess-1", "alpha", "t1", loop) + t2 = router.register_session("sess-2", "alpha", "t2", loop) + msg = {"jsonrpc": "2.0", "method": "notifications/message"} + router.deliver(msg, session_id=None, upstream="alpha", tenant=None) + assert t1.replay_since(0) == [] + assert t2.replay_since(0) == [] + finally: + loop.close() + + +def test_http_router_unattributable_broadcast_ok_within_one_tenant(): + # When every subscriber on the upstream shares one tenant scope, an + # unattributable broadcast is safe and still fans out (single-tenant + # deployments and the empty-tenant default keep their behavior). + router = HttpRouter(replay_buffer_size=10) + loop = _new_loop() + try: + a = router.register_session("sess-1", "alpha", "t1", loop) + b = router.register_session("sess-2", "alpha", "t1", loop) + msg = {"jsonrpc": "2.0", "method": "notifications/message"} + router.deliver(msg, session_id=None, upstream="alpha", tenant=None) + assert a.replay_since(0) == [(1, msg)] + assert b.replay_since(0) == [(1, msg)] + finally: + loop.close() + + def test_http_router_deliver_to_unknown_session_is_noop(): router = HttpRouter(replay_buffer_size=10) loop = _new_loop() diff --git a/tests/test_server.py b/tests/test_server.py index 5bd638a..bb56407 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -132,6 +132,46 @@ def test_audit_chain_unknown_action_404(client): assert r.status_code == 404 +def test_audit_chain_read_is_tenant_scoped(client): + # Tenant A writes a chain for act-iso. + r = client.post( + "/v1/audit/events", + json={ + "event_type": "action_requested", + "action_id": "act-iso", + "agent_id": "a-acme", + "tool_name": "data.read", + "payload": {"secret": "acme-only"}, + }, + headers={"X-Vaara-Tenant": "acme"}, + ) + assert r.status_code == 201, r.text + + # Tenant A can read its own chain. + own = client.get( + "/v1/audit/actions/act-iso/chain", + headers={"X-Vaara-Tenant": "acme"}, + ) + assert own.status_code == 200 + assert len(own.json()["events"]) == 1 + assert own.json()["events"][0]["payload"]["secret"] == "acme-only" + + # Tenant B knows the action_id but must not read it — and gets 404, not + # 403, so the response cannot confirm act-iso exists for another tenant. + other = client.get( + "/v1/audit/actions/act-iso/chain", + headers={"X-Vaara-Tenant": "globex"}, + ) + assert other.status_code == 404 + assert "acme-only" not in other.text + + # A caller with no tenant header (single-tenant scope "") is likewise + # walled off from a tenant-owned action. + anon = client.get("/v1/audit/actions/act-iso/chain") + assert anon.status_code == 404 + assert "acme-only" not in anon.text + + def test_audit_event_bad_type_400(client): r = client.post("/v1/audit/events", json={ "event_type": "not_a_real_event", diff --git a/tests/test_sqlite_backend.py b/tests/test_sqlite_backend.py index 48bb498..aa86837 100644 --- a/tests/test_sqlite_backend.py +++ b/tests/test_sqlite_backend.py @@ -234,6 +234,7 @@ def _assert_current(self, path: Path) -> None: assert "data_usage" in cols assert "decision_making" in cols assert "limitations" in cols + assert "chain_version" in cols conn.close() def test_preversion_db_migrates(self, db_path): @@ -251,6 +252,59 @@ def test_reopening_current_db_is_idempotent(self, db_path): SQLiteAuditBackend(db_path).close() self._assert_current(db_path) + def test_pre_v047_record_verifies_after_v4_migration(self, db_path): + """A legacy record whose record_hash was computed the v1 way (tenant_id + NOT in the hash) must still re-verify after migrating to schema v4, + even though its tenant_id column is populated. This is the backward- + compat guarantee of the chain_version flag.""" + import sqlite3 + + from vaara.audit.trail import AuditRecord, EventType + + # Hash computed under v1 rules (chain_version defaults to 1). + rec = AuditRecord( + record_id="r1", action_id="a1", + event_type=EventType.ACTION_REQUESTED, timestamp=1.0, + agent_id="agent", tool_name="t", + ) + legacy_hash = rec.compute_hash() + assert rec.chain_version == 1 + + # Seed a schema-v3 DB (tenant + transparency cols, NO chain_version) + # with that record, tenant_id populated in the column. + conn = sqlite3.connect(str(db_path), isolation_level=None) + conn.executescript( + self._V0_AUDIT_RECORDS_SQL.replace( + "seq INTEGER NOT NULL\n );", + "seq INTEGER NOT NULL,\n" + " tenant_id TEXT NOT NULL DEFAULT '',\n" + " system_operation TEXT, data_usage TEXT,\n" + " decision_making TEXT, limitations TEXT\n );", + ) + ) + conn.execute("CREATE TABLE audit_meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)") + conn.execute("INSERT INTO audit_meta (key, value) VALUES ('schema_version', '3')") + conn.execute( + "INSERT INTO audit_records (record_id, action_id, event_type, " + "timestamp, agent_id, tool_name, data, regulatory, previous_hash, " + "record_hash, seq, tenant_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)", + ("r1", "a1", "action_requested", 1.0, "agent", "t", "{}", "[]", + "", legacy_hash, 0, "tenant-a"), + ) + conn.close() + + backend = SQLiteAuditBackend(db_path) # migrates 3 -> 4 + try: + reloaded = backend.load_trail(strict=True) # raises if chain broke + finally: + backend.close() + self._assert_current(db_path) + assert reloaded.verify_chain() is None + loaded = reloaded._records[0] + assert loaded.chain_version == 1 # legacy stays v1 + assert loaded.tenant_id == "tenant-a" # column value preserved + assert loaded.record_hash == legacy_hash # tenant NOT folded into hash + class TestSkeletonRecordsCounter: """Loop 51: load_trail reports skeleton rows via log only; the count diff --git a/tests/test_v040_tenant.py b/tests/test_v040_tenant.py index b24878a..3a856a1 100644 --- a/tests/test_v040_tenant.py +++ b/tests/test_v040_tenant.py @@ -246,8 +246,9 @@ def worker(t: int) -> None: assert trail.verify_chain() is None -def test_audit_record_hash_excludes_tenant_id(): - """tenant_id is NOT part of compute_hash so pre-v0.40 chains re-verify.""" +def test_v1_record_hash_excludes_tenant_id(): + """Chain v1 (legacy default) leaves tenant_id out of the hash, so pre-v0.47 + trails written before tenant binding re-verify byte for byte.""" rec_no = AuditRecord( record_id="r1", action_id="a1", event_type=EventType.ACTION_REQUESTED, timestamp=1.0, agent_id="a", tool_name="t", @@ -256,4 +257,120 @@ def test_audit_record_hash_excludes_tenant_id(): record_id="r1", action_id="a1", event_type=EventType.ACTION_REQUESTED, timestamp=1.0, agent_id="a", tool_name="t", tenant_id="tenant-x", ) + assert rec_no.chain_version == 1 assert rec_no.compute_hash() == rec_with.compute_hash() + + +def test_v2_record_hash_binds_tenant_id(): + """Chain v2 binds tenant_id into the hash, so two records differing only + in tenant_id hash differently.""" + rec_a = AuditRecord( + record_id="r1", action_id="a1", event_type=EventType.ACTION_REQUESTED, + timestamp=1.0, agent_id="a", tool_name="t", + tenant_id="tenant-a", chain_version=2, + ) + rec_b = AuditRecord( + record_id="r1", action_id="a1", event_type=EventType.ACTION_REQUESTED, + timestamp=1.0, agent_id="a", tool_name="t", + tenant_id="tenant-b", chain_version=2, + ) + assert rec_a.compute_hash() != rec_b.compute_hash() + + +def test_v1_hash_is_byte_stable_across_versions(): + """The exact v1 hash must never change — a fixed expected digest guards + against any future edit silently breaking pre-v0.47 chain re-verification.""" + import hashlib + import json + + rec = AuditRecord( + record_id="r1", action_id="a1", event_type=EventType.ACTION_REQUESTED, + timestamp=1.0, agent_id="a", tool_name="t", tenant_id="ignored-in-v1", + ) + expected = hashlib.sha256( + json.dumps( + { + "record_id": "r1", "action_id": "a1", + "event_type": "action_requested", "timestamp": 1.0, + "agent_id": "a", "tool_name": "t", "data": {}, + "regulatory_articles": [], "previous_hash": "", + }, + sort_keys=True, separators=(",", ":"), allow_nan=False, + ).encode() + ).hexdigest() + assert rec.compute_hash() == expected + + +def test_trail_stamps_v2_and_detects_tenant_reattribution(): + """A live trail stamps chain v2 on every record, and re-attributing a + record to another tenant breaks verify_chain (the v0.47 fix).""" + trail = AuditTrail() + aid = trail.record_action_requested( + ActionRequest( + action_type=_action_type(), tool_name="secret_tool", + agent_id="agent-a", parameters={}, tenant_id="tenant-a", + ) + ) + rec = trail._records[0] + assert rec.chain_version == 2 + assert rec.tenant_id == "tenant-a" + assert trail.verify_chain() is None + + rec.tenant_id = "tenant-b" # simulate store-write re-attribution + assert trail.verify_chain() is not None + assert aid # silence unused + + +def test_v2_records_roundtrip_through_sqlite(tmp_path): + """v2 records persist their chain_version + tenant_id, and the chain + re-verifies on reload (catches the stored-vs-hashed-tenant landmine).""" + from vaara.audit.sqlite_backend import SQLiteAuditBackend + + db = tmp_path / "audit.db" + backend = SQLiteAuditBackend(db) + trail = AuditTrail(on_record=backend.write_record) + try: + for tid in ("tenant-a", "tenant-b", ""): + trail.record_action_requested( + ActionRequest( + action_type=_action_type(), tool_name="t", + agent_id="agent", parameters={}, tenant_id=tid, + ) + ) + finally: + backend.close() + + reopened = SQLiteAuditBackend(db) + try: + reloaded = reopened.load_trail(strict=True) # raises if chain broken + finally: + reopened.close() + assert len(reloaded._records) == 3 + assert [r.chain_version for r in reloaded._records] == [2, 2, 2] + assert [r.tenant_id for r in reloaded._records] == ["tenant-a", "tenant-b", ""] + assert reloaded.verify_chain() is None + + +def test_signed_export_roundtrips_v2_chain(tmp_path): + """The standalone verifier (verify._verify_chain_bytes) must accept a v2 + chain — keeps it in lockstep with AuditRecord.compute_hash.""" + pytest.importorskip("cryptography") + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey + + from vaara.audit.export import export_signed + from vaara.audit.verify import verify_signed + + trail = AuditTrail() + for tid in ("tenant-a", "tenant-b"): + trail.record_action_requested( + ActionRequest( + action_type=_action_type(), tool_name="t", + agent_id="agent", parameters={}, tenant_id=tid, + ) + ) + key = Ed25519PrivateKey.generate() + out = tmp_path / "trail.zip" + export_signed(trail, out, key) + + result = verify_signed(out) + assert result.ok, result.errors