From 1685f9c628792dfbfbebbe99c4ab3de3c6d9bf33 Mon Sep 17 00:00:00 2001 From: Chris Means Date: Mon, 30 Mar 2026 00:20:50 -0500 Subject: [PATCH] Fix critical audit findings: FORCE RLS, owner-scoped UPDATEs, canonical email, auto_provision default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C1: FORCE ROW LEVEL SECURITY — new migration so table owner can't bypass RLS C2: Add owner_id to WHERE clause in update_entry, upsert_alert_update, upsert_preference_update SQL — prevents cross-tenant updates C3: OAuth auto-provisioning and identity linking now use canonical_email (handles Gmail dot/+tag variants), extract canonical_email to helpers.py C4: AuthMiddleware.auto_provision defaults to False (was True) Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 6 +++ alembic/versions/j5e6f7g8h9i0_force_rls.py | 51 +++++++++++++++++++ src/mcp_awareness/cli.py | 14 ++--- src/mcp_awareness/helpers.py | 16 ++++++ src/mcp_awareness/middleware.py | 2 +- src/mcp_awareness/postgres_store.py | 18 +++++-- src/mcp_awareness/sql/create_user_auto.sql | 9 ++-- src/mcp_awareness/sql/link_oauth_identity.sql | 7 +-- src/mcp_awareness/sql/update_entry.sql | 4 +- src/mcp_awareness/sql/upsert_alert_update.sql | 4 +- .../sql/upsert_preference_update.sql | 4 +- tests/test_oauth.py | 18 +++++++ 12 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 alembic/versions/j5e6f7g8h9i0_force_rls.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dba91d..4b0bf1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **PR label automation**: added explicit `checks: read` permission - **PR label automation**: `on-ci-pass` now finds PRs from dependabot and other non-default branches by falling back to head branch search when the `pull_requests` array is empty +### Security +- **FORCE ROW LEVEL SECURITY**: RLS policies now enforced on table owner role — previously `ENABLE` without `FORCE` allowed the connection pool role to bypass all policies +- **UPDATE SQL owner scoping**: `update_entry`, `upsert_alert_update`, `upsert_preference_update` now include `AND owner_id = %s` in WHERE clause — prevents cross-tenant updates +- **OAuth canonical_email matching**: auto-provisioning and identity linking now use `canonical_email` (strips Gmail dots/+tags) — prevents duplicate accounts from email variants +- **AuthMiddleware default**: `auto_provision` parameter defaults to `False` (was `True`) — prevents accidental auto-provisioning when instantiated directly + ### Documentation - **Auth setup guide** (`docs/auth-setup.md`): JWT authentication, OAuth 2.1, CLI tools reference, user provisioning, WorkOS walkthrough, known limitations - **README**: auth/OAuth env vars tables, CLI tools, security section rewritten (4-layer table), test count 383→490, removed stale "not yet implemented" auth line diff --git a/alembic/versions/j5e6f7g8h9i0_force_rls.py b/alembic/versions/j5e6f7g8h9i0_force_rls.py new file mode 100644 index 0000000..3e92412 --- /dev/null +++ b/alembic/versions/j5e6f7g8h9i0_force_rls.py @@ -0,0 +1,51 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""enforce RLS on table owner — FORCE ROW LEVEL SECURITY + +Without FORCE, the table owner role (which is the connection pool role) +bypasses all RLS policies. This migration adds FORCE so policies apply +to all roles, including the owner. + +Revision ID: j5e6f7g8h9i0 +Revises: i4d5e6f7g8h9 +Create Date: 2026-03-30 00:00:00.000000 + +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "j5e6f7g8h9i0" +down_revision: str | Sequence[str] | None = "i4d5e6f7g8h9" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +_TABLES = ["entries", "reads", "actions", "embeddings"] + + +def upgrade() -> None: + for table in _TABLES: + op.execute(f"ALTER TABLE {table} FORCE ROW LEVEL SECURITY") + + +def downgrade() -> None: + for table in _TABLES: + op.execute(f"ALTER TABLE {table} NO FORCE ROW LEVEL SECURITY") diff --git a/src/mcp_awareness/cli.py b/src/mcp_awareness/cli.py index 6b20945..b5a69cd 100644 --- a/src/mcp_awareness/cli.py +++ b/src/mcp_awareness/cli.py @@ -42,17 +42,9 @@ def _parse_duration(s: str) -> timedelta: def _canonical_email(email: str) -> str: """Normalize email for uniqueness: strip +tags, dots for gmail, lowercase.""" - email = email.lower().strip() - local, _, domain = email.partition("@") - if not domain: - return email - # Strip +tag - local = local.split("+")[0] - # Gmail/Googlemail: strip dots - if domain in ("gmail.com", "googlemail.com"): - local = local.replace(".", "") - domain = "gmail.com" - return f"{local}@{domain}" + from .helpers import canonical_email + + return canonical_email(email) def _validate_phone(phone: str) -> str: diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index 95e1fea..4db1667 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -39,6 +39,22 @@ DEFAULT_QUERY_LIMIT = 200 + +def canonical_email(email: str) -> str: + """Normalize email for uniqueness: strip +tags, dots for gmail, lowercase.""" + email = email.lower().strip() + local, _, domain = email.partition("@") + if not domain: + return email + # Strip +tag + local = local.split("+")[0] + # Gmail/Googlemail: strip dots + if domain in ("gmail.com", "googlemail.com"): + local = local.replace(".", "") + domain = "gmail.com" + return f"{local}@{domain}" + + _VALID_ENTRY_TYPES = [e.value for e in EntryType] diff --git a/src/mcp_awareness/middleware.py b/src/mcp_awareness/middleware.py index 40e6805..624fc0d 100644 --- a/src/mcp_awareness/middleware.py +++ b/src/mcp_awareness/middleware.py @@ -139,7 +139,7 @@ def __init__( jwt_secret: str, algorithm: str = "HS256", oauth_validator: object | None = None, - auto_provision: bool = True, + auto_provision: bool = False, resource_metadata_url: str = "", ) -> None: self.app = app diff --git a/src/mcp_awareness/postgres_store.py b/src/mcp_awareness/postgres_store.py index d96cbf5..95b312c 100644 --- a/src/mcp_awareness/postgres_store.py +++ b/src/mcp_awareness/postgres_store.py @@ -270,7 +270,7 @@ def upsert_alert( self._set_rls_context(cur, owner_id) cur.execute( _load_sql("upsert_alert_update"), - (now, json.dumps(e.tags), json.dumps(e.data), e.id), + (now, json.dumps(e.tags), json.dumps(e.data), e.id, owner_id), ) return e entry = Entry( @@ -307,7 +307,7 @@ def upsert_preference( self._set_rls_context(cur, owner_id) cur.execute( _load_sql("upsert_preference_update"), - (now, json.dumps(e.tags), json.dumps(e.data), e.id), + (now, json.dumps(e.tags), json.dumps(e.data), e.id, owner_id), ) return e entry = Entry( @@ -545,6 +545,7 @@ def update_entry(self, owner_id: str, entry_id: str, updates: dict[str, Any]) -> json.dumps(entry.tags), json.dumps(entry.data), entry.id, + owner_id, ), ) return entry @@ -1190,10 +1191,13 @@ def create_user_if_not_exists( oauth_issuer: str | None = None, ) -> None: """Auto-provision a user on first OAuth login. No-op if user exists.""" + from .helpers import canonical_email + + canon = canonical_email(email) if email else None with self._pool.connection() as conn, conn.transaction(), conn.cursor() as cur: cur.execute( _load_sql("create_user_auto"), - (user_id, email, display_name, oauth_subject, oauth_issuer), + (user_id, email, canon, display_name, oauth_subject, oauth_issuer), ) def get_user_by_oauth(self, oauth_issuer: str, oauth_subject: str) -> dict[str, Any] | None: @@ -1203,17 +1207,21 @@ def get_user_by_oauth(self, oauth_issuer: str, oauth_subject: str) -> dict[str, return cur.fetchone() def link_oauth_identity(self, oauth_subject: str, oauth_issuer: str, email: str) -> str | None: - """Link an OAuth identity to a pre-provisioned user matched by email. + """Link an OAuth identity to a pre-provisioned user matched by canonical email. Returns the user ID if linked, None if no matching user found. Only links if the user's oauth_subject is currently NULL (first-time link). + Uses canonical_email for matching (handles Gmail dot/+tag variants). """ + from .helpers import canonical_email + + canon = canonical_email(email) with ( self._pool.connection() as conn, conn.transaction(), conn.cursor(row_factory=dict_row) as cur, ): - cur.execute(_load_sql("link_oauth_identity"), (oauth_subject, oauth_issuer, email)) + cur.execute(_load_sql("link_oauth_identity"), (oauth_subject, oauth_issuer, canon)) row = cur.fetchone() return str(row["id"]) if row else None diff --git a/src/mcp_awareness/sql/create_user_auto.sql b/src/mcp_awareness/sql/create_user_auto.sql index 03b1b13..eb02071 100644 --- a/src/mcp_awareness/sql/create_user_auto.sql +++ b/src/mcp_awareness/sql/create_user_auto.sql @@ -1,7 +1,8 @@ /* name: create_user_auto */ /* mode: literal */ /* Auto-provision a user on first OAuth login. No-op if user already exists. - Params: user_id, email, display_name, oauth_subject, oauth_issuer */ -INSERT INTO users (id, email, display_name, oauth_subject, oauth_issuer, created) -VALUES (%s, %s, %s, %s, %s, now()) -ON CONFLICT (id) DO NOTHING + Handles both id and canonical_email uniqueness conflicts. + Params: user_id, email, canonical_email, display_name, oauth_subject, oauth_issuer */ +INSERT INTO users (id, email, canonical_email, display_name, oauth_subject, oauth_issuer, created) +VALUES (%s, %s, %s, %s, %s, %s, now()) +ON CONFLICT DO NOTHING diff --git a/src/mcp_awareness/sql/link_oauth_identity.sql b/src/mcp_awareness/sql/link_oauth_identity.sql index a6505d7..70b61e3 100644 --- a/src/mcp_awareness/sql/link_oauth_identity.sql +++ b/src/mcp_awareness/sql/link_oauth_identity.sql @@ -1,10 +1,11 @@ /* name: link_oauth_identity */ /* mode: literal */ -/* Link an OAuth identity to an existing user found by email. +/* Link an OAuth identity to an existing user found by canonical email. Sets oauth_subject and oauth_issuer on first OAuth login. Only updates if oauth_subject is currently NULL (first-time link). - Params: oauth_subject, oauth_issuer, email */ + Uses canonical_email for matching (handles Gmail dot/+tag variants). + Params: oauth_subject, oauth_issuer, canonical_email */ UPDATE users SET oauth_subject = %s, oauth_issuer = %s, updated = now() -WHERE email = %s AND oauth_subject IS NULL AND deleted IS NULL +WHERE canonical_email = %s AND oauth_subject IS NULL AND deleted IS NULL RETURNING id diff --git a/src/mcp_awareness/sql/update_entry.sql b/src/mcp_awareness/sql/update_entry.sql index 420b958..4c3d312 100644 --- a/src/mcp_awareness/sql/update_entry.sql +++ b/src/mcp_awareness/sql/update_entry.sql @@ -3,6 +3,6 @@ /* Update a knowledge entry's mutable fields (source, tags, data with changelog). Used for note, pattern, context, preference types only — status/alert/suppression are immutable. Python-side computes the changelog diff before calling this. - Params: updated, source, tags (jsonb), data (jsonb), id + Params: updated, source, tags (jsonb), data (jsonb), id, owner_id */ -UPDATE entries SET updated = %s, source = %s, tags = %s::jsonb, data = %s::jsonb WHERE id = %s +UPDATE entries SET updated = %s, source = %s, tags = %s::jsonb, data = %s::jsonb WHERE id = %s AND owner_id = %s diff --git a/src/mcp_awareness/sql/upsert_alert_update.sql b/src/mcp_awareness/sql/upsert_alert_update.sql index a0d4849..f98d988 100644 --- a/src/mcp_awareness/sql/upsert_alert_update.sql +++ b/src/mcp_awareness/sql/upsert_alert_update.sql @@ -1,6 +1,6 @@ /* name: upsert_alert_update */ /* mode: literal */ /* Update an existing alert entry's tags and data during upsert. - Params: updated, tags (jsonb), data (jsonb), id + Params: updated, tags (jsonb), data (jsonb), id, owner_id */ -UPDATE entries SET updated = %s, tags = %s::jsonb, data = %s::jsonb WHERE id = %s +UPDATE entries SET updated = %s, tags = %s::jsonb, data = %s::jsonb WHERE id = %s AND owner_id = %s diff --git a/src/mcp_awareness/sql/upsert_preference_update.sql b/src/mcp_awareness/sql/upsert_preference_update.sql index 0b55cf1..59cc571 100644 --- a/src/mcp_awareness/sql/upsert_preference_update.sql +++ b/src/mcp_awareness/sql/upsert_preference_update.sql @@ -1,6 +1,6 @@ /* name: upsert_preference_update */ /* mode: literal */ /* Update an existing preference entry's tags and data during upsert. - Params: updated, tags (jsonb), data (jsonb), id + Params: updated, tags (jsonb), data (jsonb), id, owner_id */ -UPDATE entries SET updated = %s, tags = %s::jsonb, data = %s::jsonb WHERE id = %s +UPDATE entries SET updated = %s, tags = %s::jsonb, data = %s::jsonb WHERE id = %s AND owner_id = %s diff --git a/tests/test_oauth.py b/tests/test_oauth.py index 4b9735f..78d2f73 100644 --- a/tests/test_oauth.py +++ b/tests/test_oauth.py @@ -455,6 +455,15 @@ async def capture_send(msg: dict[str, Any]) -> None: class TestAutoProvisionIntegration: """Integration test: middleware auto-provision with a real store.""" + @pytest.fixture(autouse=True) + def _cleanup_integration_users(self, store: Any) -> Any: + yield + with store._pool.connection() as conn, conn.transaction(), conn.cursor() as cur: + cur.execute( + "DELETE FROM users WHERE id IN " + "('integration-user', 'linked-alice', 'cli-bob', 'failing-user')" + ) + @pytest.mark.anyio async def test_ensure_user_creates_record(self, store: Any, monkeypatch: Any) -> None: """_ensure_user calls store.create_user_if_not_exists through the server module.""" @@ -702,6 +711,15 @@ def test_build_oauth_validator_returns_validator_with_issuer(self) -> None: class TestAutoProvisioning: + @pytest.fixture(autouse=True) + def _cleanup_oauth_users(self, store: Any) -> Any: + yield + # Clean up OAuth test users that aren't covered by conftest clear(TEST_OWNER) + with store._pool.connection() as conn, conn.transaction(), conn.cursor() as cur: + cur.execute( + "DELETE FROM users WHERE id LIKE 'oauth-%' OR id LIKE 'pre-%' OR id LIKE 'linked-%'" + ) + def test_create_user_with_oauth_identity(self, store: Any) -> None: """Auto-provisioning stores OAuth identity fields.""" store.create_user_if_not_exists(