Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,30 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht

## [Unreleased]

## [0.47.0] - 2026-05-31

**Theme: tenant isolation across the evidence path. The reference server can no
longer leak one tenant's audit chain to another, and tenant identity is now part
of the tamper-evident hash chain itself.**

### Security
- Tenant identity is now bound into the audit hash chain. `AuditRecord` carries
a `chain_version`; records written from this release on (chain v2) fold
`tenant_id` into `compute_hash`, so re-attributing a record to another tenant
after the fact breaks `verify_chain()` instead of passing silently. Pre-v0.47
records (chain v1) keep `tenant_id` out of the hash and re-verify byte for
byte, so existing trails and signed exports stay valid. The SQLite store gains
a `chain_version` column (schema v4) with a migration defaulting legacy rows to
v1. The standalone verifier mirrors the same rule.
- The reference HTTP server's audit-chain read (`GET /v1/audit/actions/{id}/chain`)
is now tenant-scoped: a caller can no longer read another tenant's action chain
by guessing an `action_id`. Unknown and cross-tenant actions both return 404
with an identical body, so the response is not an existence oracle. The scoped
read also resolves chain positions in one pass, removing an O(n^2) lookup.
- SSE notification broadcast is now tenant-scoped: upstream-pushed notifications
on a shared upstream no longer fan out across tenants. Unattributable log
notifications (no progressToken) broadcast only within a single tenant scope.

## [0.46.0] - 2026-05-31

**Theme: multi-tenant runtime governance, made real. A hardening release that
Expand Down
2 changes: 1 addition & 1 deletion clients/ts/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@vaara/client",
"version": "0.46.0",
"version": "0.47.0",
"mcpName": "io.github.vaaraio/vaara",
"description": "TypeScript client for the Vaara HTTP API. Conformal risk scoring, hash-chained audit, policy reload, named detectors.",
"main": "dist/index.js",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "vaara"
version = "0.46.0"
version = "0.47.0"
description = "Tamper-evident runtime evidence layer for AI agents: conformal risk scoring, hash-chained audit trails, and signed attestation plus execution receipts per MCP tool call"
requires-python = ">=3.10"
license = "Apache-2.0"
Expand Down
4 changes: 2 additions & 2 deletions server-vaara-server.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
"url": "https://github.com/vaaraio/vaara",
"source": "github"
},
"version": "0.46.0",
"version": "0.47.0",
"packages": [
{
"registryType": "pypi",
"registryBaseUrl": "https://pypi.org",
"identifier": "vaara",
"version": "0.46.0",
"version": "0.47.0",
"runtimeHint": "uvx",
"transport": {
"type": "stdio"
Expand Down
4 changes: 2 additions & 2 deletions server.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
"url": "https://github.com/vaaraio/vaara",
"source": "github"
},
"version": "0.46.0",
"version": "0.47.0",
"packages": [
{
"registryType": "pypi",
"registryBaseUrl": "https://pypi.org",
"identifier": "vaara",
"version": "0.46.0",
"version": "0.47.0",
"runtimeHint": "uvx",
"transport": {
"type": "stdio"
Expand Down
2 changes: 1 addition & 1 deletion src/vaara/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
oversight.
"""

__version__ = "0.46.0"
__version__ = "0.47.0"

from vaara.pipeline import InterceptionPipeline, InterceptionResult

Expand Down
50 changes: 39 additions & 11 deletions src/vaara/audit/sqlite_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

logger = logging.getLogger(__name__)

SCHEMA_VERSION = 3
SCHEMA_VERSION = 4


def _scrub_nonfinite(obj: Any) -> Any:
Expand Down Expand Up @@ -81,7 +81,11 @@ def _strict_json_dumps(obj: Any) -> str:
system_operation TEXT,
data_usage TEXT,
decision_making TEXT,
limitations TEXT
limitations TEXT,
-- v0.47 schema v4: hash-chain format version. Records written before
-- tenant binding carry 1 (tenant_id NOT in the hash); v2+ bind it.
-- Kept last so the column index matches the ALTER in _MIGRATIONS[3].
chain_version INTEGER NOT NULL DEFAULT 1
);

CREATE INDEX IF NOT EXISTS idx_action_id ON audit_records(action_id);
Expand Down Expand Up @@ -140,6 +144,13 @@ def _strict_json_dumps(obj: Any) -> str:
ALTER TABLE audit_records ADD COLUMN decision_making TEXT;
ALTER TABLE audit_records ADD COLUMN limitations TEXT;
""",
# v3 to v4: hash-chain format version (v0.47). Existing rows default to
# 1 — their record_hash was computed without tenant_id and stays valid
# (NOT re-hashed on load), so chain verification of historical records
# keeps passing. New records are written with chain_version=2.
3: """
ALTER TABLE audit_records ADD COLUMN chain_version INTEGER NOT NULL DEFAULT 1;
""",
}


Expand Down Expand Up @@ -340,11 +351,12 @@ def write_record(self, record: AuditRecord) -> None:
(record_id, action_id, event_type, timestamp, agent_id,
tool_name, data, regulatory, previous_hash, record_hash, seq,
tenant_id,
system_operation, data_usage, decision_making, limitations)
system_operation, data_usage, decision_making, limitations,
chain_version)
VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
COALESCE((SELECT MAX(seq) FROM audit_records), -1) + 1,
?, ?, ?, ?, ?
?, ?, ?, ?, ?, ?
)""",
(
record.record_id,
Expand All @@ -358,14 +370,22 @@ def write_record(self, record: AuditRecord) -> None:
record.previous_hash,
record.record_hash,
# Per-record tenant_id wins so a single backend instance
# can serve a multi-tenant runtime (v0.40+). Empty record
# tenant_id falls back to instance scope for the legacy
# single-tenant init path.
record.tenant_id or self._tenant_id,
# can serve a multi-tenant runtime (v0.40+). For chain v2+
# tenant_id is bound into record_hash, so the stored value
# MUST equal the hashed value or the chain won't re-verify
# on reload — the instance-scope substitution is therefore
# confined to legacy v1 records, where tenant_id was never
# hashed (keeps tenant-scoped backends tagging old-style
# empty-tenant writes; see test_purge_is_tenant_scoped).
record.tenant_id if record.chain_version >= 2
else (record.tenant_id or self._tenant_id),
record.system_operation,
record.data_usage,
record.decision_making,
record.limitations,
# Persist the chain format so the tenant binding (or its
# absence on legacy records) re-verifies on reload.
record.chain_version,
),
)

Expand Down Expand Up @@ -661,28 +681,35 @@ def list_redactions(self) -> list[dict]:
def _row_to_record(self, row: tuple) -> AuditRecord:
"""Convert a database row to an AuditRecord, applying GDPR redactions.

Column layout (schema v3):
Column layout (schema v4):
row[0..9] record_id, action_id, event_type, timestamp, agent_id,
tool_name, data, regulatory, previous_hash, record_hash
row[10] seq
row[11] tenant_id
row[12..15] system_operation, data_usage, decision_making, limitations
row[16] chain_version

Pre-v0.6 records (migrated from schema v2) carry NULL for the
transparency-taxonomy columns. Their original record_hash was
computed without those fields and stays valid — we do NOT
re-hash on load.
re-hash on load. Likewise pre-v0.47 records carry chain_version 1
(the migration default), so tenant_id stays outside their hash and
the chain re-verifies exactly as written.
"""
agent_id = row[4]
if self._redaction_cache and agent_id in self._redaction_cache:
agent_id = self._redaction_cache[agent_id]
# Defensive indexing: rows from older queries may not include
# the v3 columns. Use a guard so loading old DBs still works.
# later-schema columns. Use a guard so loading old DBs still works.
tenant_id = row[11] if len(row) > 11 else ""
sys_op = row[12] if len(row) > 12 else None
data_use = row[13] if len(row) > 13 else None
dec_mk = row[14] if len(row) > 14 else None
lims = row[15] if len(row) > 15 else None
# chain_version drives whether tenant_id is part of compute_hash;
# default 1 (tenant outside hash) for any short/legacy row so the
# reconstructed record re-hashes to its stored record_hash.
chain_version = row[16] if len(row) > 16 and row[16] is not None else 1
return AuditRecord(
record_id=row[0],
action_id=row[1],
Expand All @@ -699,6 +726,7 @@ def _row_to_record(self, row: tuple) -> AuditRecord:
data_usage=data_use,
decision_making=dec_mk,
limitations=lims,
chain_version=chain_version,
)

# ── Backup ────────────────────────────────────────────────────
Expand Down
59 changes: 58 additions & 1 deletion src/vaara/audit/trail.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,17 @@ class RegulatoryArticle:

# ── Audit Record ──────────────────────────────────────────────────────────

# Hash-chain format version stamped on every newly appended record.
# v1 (legacy): tenant_id is NOT part of compute_hash() — preserves
# re-verification of pre-v0.47 trails written before tenant binding.
# v2: tenant_id and chain_version ARE bound into the hash, so a record
# cannot be silently re-attributed to another tenant (or downgraded
# to v1 to strip the binding) without breaking the chain.
# Records loaded from storage keep their own stored chain_version; only
# AuditTrail._append stamps the current version on fresh records.
_CURRENT_CHAIN_VERSION = 2


@dataclass
class AuditRecord:
"""A single immutable audit event in the trail.
Expand All @@ -259,8 +270,12 @@ class AuditRecord:
decision_making: Optional[str] = None
limitations: Optional[str] = None
# v0.40: multi-tenant scoping. Empty string = single-tenant deployment.
# Excluded from compute_hash() to preserve pre-v0.40 chain re-verification.
# Bound into compute_hash() from chain v2 (v0.47+); see chain_version.
tenant_id: str = ""
# Hash-chain format version (see _CURRENT_CHAIN_VERSION). Defaults to 1
# so records deserialized from pre-v0.47 storage (which carry no
# chain_version column/key) re-hash exactly as originally written.
chain_version: int = 1

def __post_init__(self) -> None:
# Loaded-from-DB records carry a non-empty record_hash. Skip
Expand Down Expand Up @@ -305,6 +320,16 @@ def compute_hash(self) -> str:
"regulatory_articles": self.regulatory_articles,
"previous_hash": self.previous_hash,
}
# Chain v2 (v0.47+): bind tenant_id into the tamper-evident surface
# so a record cannot be silently re-attributed to another tenant.
# chain_version is bound too, so a downgrade to v1 (which would drop
# the tenant binding) also breaks the chain. v1 records omit both
# keys and hash exactly as pre-v0.47 — old trails re-verify byte for
# byte. The gate is >= 2 so a future v3 keeps binding these unless it
# deliberately changes the scheme.
if self.chain_version >= 2:
content["tenant_id"] = self.tenant_id
content["chain_version"] = self.chain_version
# NOTE on transparency taxonomy (v0.6):
# The four prEN ISO/IEC 12792 fields (system_operation, data_usage,
# decision_making, limitations) are NOT included in the hash. They
Expand Down Expand Up @@ -993,6 +1018,33 @@ def get_action_trail(self, action_id: str) -> list[AuditRecord]:
with self._lock:
return list(self._by_action.get(action_id, []))

def get_action_chain_scoped(
self, action_id: str, tenant_id: str = ""
) -> list[tuple[int, AuditRecord]]:
"""Tenant-scoped chain read for the reference server.

Returns ``(chain_position, record)`` pairs for ``action_id``, but only
the records whose ``tenant_id`` matches the caller's ``tenant_id``. A
caller scoped to one tenant can never read another tenant's records,
and the empty-string tenant (single-tenant deployments) only ever sees
empty-tenant records. Positions are resolved in a single pass under the
lock rather than an ``O(n)`` ``index()`` per record.

Returns an empty list both when the action is unknown and when it
belongs to a different tenant — the caller maps both to 404, so a
cross-tenant probe cannot use the response to confirm an action_id
exists for another tenant.
"""
want = tenant_id or ""
with self._lock:
if action_id not in self._by_action:
return []
return [
(pos, r)
for pos, r in enumerate(self._records)
if r.action_id == action_id and (r.tenant_id or "") == want
]

def get_agent_records(
self, agent_id: str, limit: int = 100
) -> list[AuditRecord]:
Expand Down Expand Up @@ -1098,6 +1150,11 @@ def _append(self, record: AuditRecord) -> None:
record.data = {str(k): json_safe(v) for k, v in record.data.items()}
with self._lock:
record.previous_hash = self._last_hash
# Stamp the current chain format on every fresh record so its
# tenant_id is bound into the hash. Records reloaded from storage
# never pass through _append, so their stored version is left
# intact and old trails keep re-verifying.
record.chain_version = _CURRENT_CHAIN_VERSION
record.record_hash = record.compute_hash()
self._last_hash = record.record_hash

Expand Down
10 changes: 9 additions & 1 deletion src/vaara/audit/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def _verify_chain_bytes(trail_bytes: bytes) -> Optional[str]:
f"record {idx}: previous_hash mismatch "
f"(expected {prev_hash!r}, got {rec.get('previous_hash')!r})"
)
# Recompute hash over the canonical content.
# Recompute hash over the canonical content. Mirrors
# AuditRecord.compute_hash — keep the two in lockstep.
content = {
"record_id": rec.get("record_id"),
"action_id": rec.get("action_id"),
Expand All @@ -111,6 +112,13 @@ def _verify_chain_bytes(trail_bytes: bytes) -> Optional[str]:
"regulatory_articles": rec.get("regulatory_articles", []),
"previous_hash": prev_hash,
}
# Chain v2 (v0.47+) binds tenant_id and chain_version into the hash.
# Records with no chain_version key are legacy v1 and omit both, so
# pre-v0.47 trails re-verify unchanged.
chain_version = rec.get("chain_version", 1)
if isinstance(chain_version, int) and chain_version >= 2:
content["tenant_id"] = rec.get("tenant_id", "")
content["chain_version"] = chain_version
canonical = json.dumps(
content, sort_keys=True, separators=(",", ":"), allow_nan=False
)
Expand Down
Loading