Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- **Multi-tenant schema**: `owner_id` column on all data tables (entries, reads, actions, embeddings) with backfill migration for existing data
- **Users table**: full user schema with email (+ canonical normalization for uniqueness), E.164 phone, argon2id password hash, timezone, preferences JSONB

### Fixed
- **PR label automation**: `Dev Active` is now a proper hold state — `on-push` and `on-ci-pass` skip pipeline transitions while it's present, `on-unlabel` handles promotion when it's removed
- **PR label automation**: `on-ci-pass` no longer fails on force-pushed PRs — `gh api` 404 errors handled gracefully
Expand Down
158 changes: 158 additions & 0 deletions alembic/versions/f1a2b3c4d5e6_add_owner_id_and_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# mcp-awareness — ambient system awareness for AI agents
# Copyright (C) 2026 Chris Means
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

"""add owner_id to all tables and create users table

Revision ID: f1a2b3c4d5e6
Revises: e7b2c4a91d35
Create Date: 2026-03-28 14:00:00.000000

"""

from __future__ import annotations

import getpass
import os
from collections.abc import Sequence

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "f1a2b3c4d5e6"
down_revision: str | Sequence[str] | None = "e7b2c4a91d35"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None

# Resolve default owner: env var > system username > 'system'
try:
_fallback_user = getpass.getuser()
except Exception:
_fallback_user = "system"

DEFAULT_OWNER = os.environ.get("AWARENESS_DEFAULT_OWNER", _fallback_user)
# Escape single quotes for safe SQL interpolation (e.g., O'Brien)
_escaped = DEFAULT_OWNER.replace("'", "''")


def upgrade() -> None:
# --- 1. Create users table ---
op.execute("""
CREATE TABLE IF NOT EXISTS users (
id TEXT PRIMARY KEY,
email TEXT,
canonical_email TEXT UNIQUE,
email_verified BOOLEAN NOT NULL DEFAULT FALSE,
phone TEXT,
phone_verified BOOLEAN NOT NULL DEFAULT FALSE,
password_hash TEXT,
display_name TEXT,
timezone TEXT DEFAULT 'UTC',
preferences JSONB NOT NULL DEFAULT '{}',
created TIMESTAMPTZ NOT NULL DEFAULT now(),
updated TIMESTAMPTZ NOT NULL DEFAULT now(),
deleted TIMESTAMPTZ
)
""")

# --- 2. Add owner_id to entries (nullable first, then backfill, then NOT NULL + DEFAULT) ---
op.execute("ALTER TABLE entries ADD COLUMN IF NOT EXISTS owner_id TEXT")
op.execute(f"UPDATE entries SET owner_id = '{_escaped}' WHERE owner_id IS NULL")
op.execute("ALTER TABLE entries ALTER COLUMN owner_id SET NOT NULL")
op.execute(f"ALTER TABLE entries ALTER COLUMN owner_id SET DEFAULT '{_escaped}'")

# --- 3. Add owner_id to reads ---
op.execute("ALTER TABLE reads ADD COLUMN IF NOT EXISTS owner_id TEXT")
op.execute(f"UPDATE reads SET owner_id = '{_escaped}' WHERE owner_id IS NULL")
op.execute("ALTER TABLE reads ALTER COLUMN owner_id SET NOT NULL")
op.execute(f"ALTER TABLE reads ALTER COLUMN owner_id SET DEFAULT '{_escaped}'")

# --- 4. Add owner_id to actions ---
op.execute("ALTER TABLE actions ADD COLUMN IF NOT EXISTS owner_id TEXT")
op.execute(f"UPDATE actions SET owner_id = '{_escaped}' WHERE owner_id IS NULL")
op.execute("ALTER TABLE actions ALTER COLUMN owner_id SET NOT NULL")
op.execute(f"ALTER TABLE actions ALTER COLUMN owner_id SET DEFAULT '{_escaped}'")

# --- 5. Add owner_id to embeddings ---
op.execute("ALTER TABLE embeddings ADD COLUMN IF NOT EXISTS owner_id TEXT")
op.execute(f"UPDATE embeddings SET owner_id = '{_escaped}' WHERE owner_id IS NULL")
op.execute("ALTER TABLE embeddings ALTER COLUMN owner_id SET NOT NULL")
op.execute(f"ALTER TABLE embeddings ALTER COLUMN owner_id SET DEFAULT '{_escaped}'")

# --- 6. Insert default user ---
op.execute(f"""
INSERT INTO users (id) VALUES ('{_escaped}')
ON CONFLICT (id) DO NOTHING
""")

# --- 7. Update unique index on logical_key to include owner_id ---
op.execute("DROP INDEX IF EXISTS idx_entries_source_logical_key")
op.execute("""
CREATE UNIQUE INDEX idx_entries_source_logical_key
ON entries(owner_id, source, logical_key)
WHERE logical_key IS NOT NULL AND deleted IS NULL
""")

# --- 8. Add owner_id indexes ---
# Entries: replace single-column indexes with owner-prefixed versions
op.execute("DROP INDEX IF EXISTS idx_entries_type")
op.execute("DROP INDEX IF EXISTS idx_entries_source")
op.execute("DROP INDEX IF EXISTS idx_entries_type_source")
op.execute("CREATE INDEX idx_entries_owner ON entries(owner_id)")
op.execute("CREATE INDEX idx_entries_owner_type ON entries(owner_id, type)")
op.execute("CREATE INDEX idx_entries_owner_source ON entries(owner_id, source)")
op.execute(
"CREATE INDEX idx_entries_owner_type_source ON entries(owner_id, type, source)"
)

# Reads, actions, embeddings: add owner_id index
op.execute("CREATE INDEX idx_reads_owner ON reads(owner_id)")
op.execute("CREATE INDEX idx_actions_owner ON actions(owner_id)")
op.execute("CREATE INDEX idx_embeddings_owner ON embeddings(owner_id)")


def downgrade() -> None:
# Remove owner_id indexes
op.execute("DROP INDEX IF EXISTS idx_embeddings_owner")
op.execute("DROP INDEX IF EXISTS idx_actions_owner")
op.execute("DROP INDEX IF EXISTS idx_reads_owner")
op.execute("DROP INDEX IF EXISTS idx_entries_owner_type_source")
op.execute("DROP INDEX IF EXISTS idx_entries_owner_source")
op.execute("DROP INDEX IF EXISTS idx_entries_owner_type")
op.execute("DROP INDEX IF EXISTS idx_entries_owner")

# Restore original single-column indexes
op.execute("CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(type)")
op.execute("CREATE INDEX IF NOT EXISTS idx_entries_source ON entries(source)")
op.execute(
"CREATE INDEX IF NOT EXISTS idx_entries_type_source ON entries(type, source)"
)

# Restore original unique index (without owner_id)
op.execute("DROP INDEX IF EXISTS idx_entries_source_logical_key")
op.execute("""
CREATE UNIQUE INDEX idx_entries_source_logical_key
ON entries(source, logical_key)
WHERE logical_key IS NOT NULL AND deleted IS NULL
""")

# Remove owner_id columns
op.execute("ALTER TABLE embeddings DROP COLUMN IF EXISTS owner_id")
op.execute("ALTER TABLE actions DROP COLUMN IF EXISTS owner_id")
op.execute("ALTER TABLE reads DROP COLUMN IF EXISTS owner_id")
op.execute("ALTER TABLE entries DROP COLUMN IF EXISTS owner_id")

# Drop users table
op.execute("DROP TABLE IF EXISTS users")
46 changes: 41 additions & 5 deletions docs/data-dictionary.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,40 @@

All data in mcp-awareness is stored in a single `entries` table using a common envelope pattern. Every record — whether it's a system status report, an alert, a piece of knowledge, or a preference — shares the same columns. The `type` field determines the semantics, and the `data` column holds type-specific fields.

## Table: `users`

| Column | Type | Nullable | Description |
|--------|------|----------|-------------|
| `id` | TEXT | No | Primary key. Owner identifier (e.g., `"cmeans"`, `"alice"`). |
| `email` | TEXT | Yes | Email address as provided by the user (for display, contact). |
| `canonical_email` | TEXT | Yes | Normalized email for uniqueness checks. UNIQUE constraint. See normalization rules below. |
| `email_verified` | BOOLEAN | No | Whether the email has been verified. Default: `FALSE`. |
| `phone` | TEXT | Yes | Phone number in E.164 format (e.g., `"+14155551234"`). NOT unique — shared lines exist. |
| `phone_verified` | BOOLEAN | No | Whether the phone has been verified. Default: `FALSE`. |
| `password_hash` | TEXT | Yes | argon2id hash. Nullable — OAuth-only users skip this. |
| `display_name` | TEXT | Yes | Human-readable name. |
| `timezone` | TEXT | Yes | IANA timezone (e.g., `"America/Chicago"`). Default: `"UTC"`. Used for notification scheduling. |
| `preferences` | JSONB | No | Extensible user settings (notification prefs, etc.). Default: `{}`. |
| `created` | TIMESTAMPTZ | No | When the user was created. Default: `now()`. |
| `updated` | TIMESTAMPTZ | No | Last update timestamp. Default: `now()`. |
| `deleted` | TIMESTAMPTZ | Yes | Soft deletion timestamp. `NULL` means active. |

### Email normalization (`canonical_email`)

The `canonical_email` column is computed on write to prevent trivial multi-account abuse:
1. Lowercase the entire address
2. Strip `+tag` from the local part (e.g., `user+test@gmail.com` → `user@gmail.com`) — universal, de facto standard
3. Strip dots from the local part for `gmail.com` / `googlemail.com` only (e.g., `u.s.e.r@gmail.com` → `user@gmail.com`)
4. Normalize `googlemail.com` → `gmail.com`

The UNIQUE constraint is on `canonical_email`, not `email`. Users see and use their original email; normalization is invisible.

## Table: `entries`

| Column | Type | Nullable | Description |
|--------|------|----------|-------------|
| `id` | TEXT | No | Primary key. UUID v4, generated via `uuid.uuid4()`. |
| `owner_id` | TEXT | No | Owner identifier. References the user who owns this entry. All queries are scoped by `owner_id`. |
| `type` | TEXT | No | Entry type. One of: `status`, `alert`, `pattern`, `suppression`, `context`, `preference`, `note`, `intention`. |
| `source` | TEXT | No | Origin identifier. Describes the subject, not the owner (e.g., `"personal"`, `"synology-nas"`, `"mcp-awareness-project"`). |
| `created` | TIMESTAMPTZ | No | UTC timestamp. Set once when the entry is first created. |
Expand All @@ -21,11 +50,12 @@ All data in mcp-awareness is stored in a single `entries` table using a common e

| Index | Columns | Type | Purpose |
|-------|---------|------|---------|
| `idx_entries_type` | `type` | B-tree | Filter by entry type |
| `idx_entries_source` | `source` | B-tree | Filter by source |
| `idx_entries_type_source` | `type`, `source` | B-tree | Combined filter (e.g., all alerts for a source) |
| `idx_entries_owner` | `owner_id` | B-tree | Filter by owner |
| `idx_entries_owner_type` | `owner_id`, `type` | B-tree | Filter by owner + entry type |
| `idx_entries_owner_source` | `owner_id`, `source` | B-tree | Filter by owner + source |
| `idx_entries_owner_type_source` | `owner_id`, `type`, `source` | B-tree | Combined filter (e.g., all alerts for an owner's source) |
| `idx_entries_tags_gin` | `tags` | GIN | Fast tag containment queries |
| `idx_entries_source_logical_key` | `source`, `logical_key` | Unique (partial) | Upsert deduplication (WHERE logical_key IS NOT NULL) |
| `idx_entries_source_logical_key` | `owner_id`, `source`, `logical_key` | Unique (partial) | Upsert deduplication (WHERE logical_key IS NOT NULL AND deleted IS NULL) |

## Entry types

Expand Down Expand Up @@ -154,6 +184,7 @@ Auto-populated when entries are accessed via `get_knowledge` and `get_alerts`. F
| Column | Type | Nullable | Description |
|--------|------|----------|-------------|
| `id` | SERIAL | No | Auto-incrementing primary key. |
| `owner_id` | TEXT | No | Owner identifier. Denormalized from the entry for direct query scoping and RLS. |
| `entry_id` | TEXT | No | References `entries(id)` with `ON DELETE CASCADE`. |
| `timestamp` | TIMESTAMPTZ | No | When the read occurred. Default: `now()`. |
| `platform` | TEXT | Yes | Which platform performed the read (e.g., `"claude-code"`). |
Expand All @@ -163,6 +194,7 @@ Auto-populated when entries are accessed via `get_knowledge` and `get_alerts`. F

| Index | Columns | Type | Purpose |
|-------|---------|------|---------|
| `idx_reads_owner` | `owner_id` | B-tree | Filter by owner |
| `idx_reads_entry` | `entry_id` | B-tree | Look up reads for a specific entry |
| `idx_reads_timestamp` | `timestamp` | B-tree | Time-range queries |

Expand All @@ -173,6 +205,7 @@ Agent-reported records of concrete actions taken because of an entry. Permanent
| Column | Type | Nullable | Description |
|--------|------|----------|-------------|
| `id` | SERIAL | No | Auto-incrementing primary key. |
| `owner_id` | TEXT | No | Owner identifier. Denormalized from the entry for direct query scoping and RLS. |
| `entry_id` | TEXT | No | References `entries(id)` with `ON DELETE CASCADE`. |
| `timestamp` | TIMESTAMPTZ | No | When the action was recorded. Default: `now()`. |
| `platform` | TEXT | Yes | Which platform reported the action (e.g., `"claude-code"`). |
Expand All @@ -184,6 +217,7 @@ Agent-reported records of concrete actions taken because of an entry. Permanent

| Index | Columns | Type | Purpose |
|-------|---------|------|---------|
| `idx_actions_owner` | `owner_id` | B-tree | Filter by owner |
| `idx_actions_entry` | `entry_id` | B-tree | Look up actions for a specific entry |
| `idx_actions_timestamp` | `timestamp` | B-tree | Time-range queries |
| `idx_actions_tags_gin` | `tags` | GIN | Fast tag containment queries |
Expand All @@ -195,6 +229,7 @@ Stores vector embeddings for semantic search. One embedding per entry per model.
| Column | Type | Nullable | Default | Description |
|--------|------|----------|---------|-------------|
| `id` | `SERIAL` | NO | auto | Row ID |
| `owner_id` | `TEXT` | NO | — | Owner identifier. Denormalized for direct query scoping and RLS. |
| `entry_id` | `TEXT` | NO | — | FK → `entries.id` (`ON DELETE CASCADE`) |
| `model` | `TEXT` | NO | — | Embedding model name (e.g., `nomic-embed-text`) |
| `dimensions` | `INTEGER` | NO | — | Vector dimension count (e.g., 768) |
Expand All @@ -208,6 +243,7 @@ Stores vector embeddings for semantic search. One embedding per entry per model.

| Index | Columns | Type | Purpose |
|-------|---------|------|---------|
| `idx_embeddings_owner` | `owner_id` | B-tree | Filter by owner |
| `idx_embeddings_entry` | `entry_id` | B-tree | Look up embeddings for a specific entry |
| `idx_embeddings_vector_hnsw` | `embedding` | HNSW (`vector_cosine_ops`) | Fast approximate nearest neighbor search |

Expand Down Expand Up @@ -265,4 +301,4 @@ The PostgreSQL backend is designed for a clean migration path to AWS RDS:

---

*[mcp-awareness](https://github.com/cmeans/mcp-awareness) is open source under the [Apache 2.0 License](../LICENSE). Copyright (c) 2026 Chris Means.*
*Part of the [Awareness](https://github.com/cmeans/mcp-awareness) ecosystem. Copyright (c) 2026 Chris Means.*
Loading
Loading