From 860eb25d073bea2dd72c37c405ce36b2dc0a3b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Thu, 28 May 2026 19:34:43 +0100 Subject: [PATCH] test(server): pin migration invariants + embedding-model literal drift guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stability-audit follow-up. Adds CI guards so two recently-changed schema invariants can't silently regress in a future migration: - migration-invariants.test.ts: asserts the per-user pending oauth_account unique index from #1121 exists (and the old org-wide index is gone), plus a functional contract test — a user's second parallel pending OAuth flow collides while a distinct user's is allowed. Also asserts event_embeddings carries the embedding_model stamp column (#1069/#1080). - embedding-model-literal.test.ts: the legacy-stamp backfill migration hard-codes the model literal; this fails if it ever drifts from DEFAULT_EMBEDDING_MODEL, which would silently re-open the full-corpus recall regression. Exports DEFAULT_EMBEDDING_MODEL for the assertion. --- .../db/migration-invariants.test.ts | 122 ++++++++++++++++++ .../__tests__/embedding-model-literal.test.ts | 23 ++++ packages/server/src/utils/embeddings.ts | 2 +- 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 packages/server/src/__tests__/integration/db/migration-invariants.test.ts create mode 100644 packages/server/src/utils/__tests__/embedding-model-literal.test.ts diff --git a/packages/server/src/__tests__/integration/db/migration-invariants.test.ts b/packages/server/src/__tests__/integration/db/migration-invariants.test.ts new file mode 100644 index 000000000..28abd28d4 --- /dev/null +++ b/packages/server/src/__tests__/integration/db/migration-invariants.test.ts @@ -0,0 +1,122 @@ +/** + * Migration-invariant guard. + * + * The test harness applies the real baseline -> latest migration chain before + * any test runs (setup/test-db.ts). This suite asserts the load-bearing schema + * invariants that recent migrations established, so a future migration that + * silently drops or reshapes one fails CI here rather than in production. + * + * Motivated by the 2-week stability audit: + * - #1121 reshaped pending-auth uniqueness from org-wide to per-user, scoped to + * oauth_account. The functional contract (same user collides, distinct users + * run parallel OAuth flows) is the real invariant — pin it, not just the DDL. + * - #1069/#1080 added event_embeddings.embedding_model; its absence reopens the + * full-corpus recall regression. + */ + +import { beforeAll, describe, expect, it } from 'vitest'; +import { createAuthProfile, PendingAuthConflictError } from '../../../utils/auth-profiles'; +import { cleanupTestDatabase, getTestDb } from '../../setup/test-db'; +import { + addUserToOrganization, + createTestConnectorDefinition, + createTestOrganization, + createTestUser, +} from '../../setup/test-fixtures'; + +describe('migration invariants', () => { + beforeAll(async () => { + await cleanupTestDatabase(); + }); + + describe('schema (DDL applied by the migration chain)', () => { + it('auth_profiles has the per-user pending oauth_account unique index (#1121)', async () => { + const sql = getTestDb(); + const rows = await sql<{ indexdef: string }[]>` + SELECT indexdef FROM pg_indexes + WHERE schemaname = 'public' + AND tablename = 'auth_profiles' + AND indexname = 'auth_profiles_pending_oauth_account_unique' + `; + expect(rows).toHaveLength(1); + const def = rows[0].indexdef; + expect(def).toContain('UNIQUE'); + // per-user, per-connector, per-provider scope + for (const col of ['organization_id', 'connector_key', 'provider', 'created_by']) { + expect(def).toContain(col); + } + // partial predicate: only in-flight oauth_account rows + expect(def).toContain("'pending_auth'"); + expect(def).toContain("'oauth_account'"); + }); + + it('the old org-wide auth_profiles_pending_unique index is gone (#1121)', async () => { + const sql = getTestDb(); + const rows = await sql` + SELECT 1 FROM pg_indexes + WHERE schemaname = 'public' + AND tablename = 'auth_profiles' + AND indexname = 'auth_profiles_pending_unique' + `; + expect(rows).toHaveLength(0); + }); + + it('event_embeddings carries the embedding_model stamp column (#1069/#1080)', async () => { + const sql = getTestDb(); + const rows = await sql` + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'event_embeddings' + AND column_name = 'embedding_model' + `; + expect(rows).toHaveLength(1); + }); + }); + + describe('functional contract: pending oauth_account uniqueness is per-user (#1121)', () => { + let orgId: string; + let userA: string; + let userB: string; + const connectorKey = 'invariant-oauth-connector'; + const provider = 'google'; + + beforeAll(async () => { + const org = await createTestOrganization({ name: 'Migration Invariant Org' }); + orgId = org.id; + const a = await createTestUser({ email: 'invariant-user-a@example.com' }); + const b = await createTestUser({ email: 'invariant-user-b@example.com' }); + userA = a.id; + userB = b.id; + await addUserToOrganization(userA, orgId, 'member'); + await addUserToOrganization(userB, orgId, 'member'); + await createTestConnectorDefinition({ + key: connectorKey, + name: 'Invariant OAuth', + organization_id: orgId, + }); + }); + + async function createPending(createdBy: string) { + return createAuthProfile({ + organizationId: orgId, + connectorKey, + displayName: 'Invariant pending', + profileKind: 'oauth_account', + provider, + status: 'pending_auth', + createdBy, + }); + } + + it('a user cannot open two parallel pending flows for the same connector', async () => { + await createPending(userA); + await expect(createPending(userA)).rejects.toBeInstanceOf(PendingAuthConflictError); + }); + + it('distinct users CAN run pending flows for the same connector in parallel', async () => { + const profile = await createPending(userB); + expect(profile.status).toBe('pending_auth'); + expect(profile.created_by).toBe(userB); + }); + }); +}); diff --git a/packages/server/src/utils/__tests__/embedding-model-literal.test.ts b/packages/server/src/utils/__tests__/embedding-model-literal.test.ts new file mode 100644 index 000000000..4129ccd23 --- /dev/null +++ b/packages/server/src/utils/__tests__/embedding-model-literal.test.ts @@ -0,0 +1,23 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { DEFAULT_EMBEDDING_MODEL } from '../embeddings'; + +/** + * The legacy-stamp backfill migration hard-codes the embedding model literal + * (it cannot import TS). If DEFAULT_EMBEDDING_MODEL ever changes without the + * migration's literal tracking it, legacy rows get stamped with the wrong model + * and silently drop out of the model-scoped vector search — the exact + * full-corpus recall regression #1069/#1080 fixed. Pin the two together. + */ +const MIGRATIONS_DIR = path.resolve(__dirname, '../../../../../db/migrations'); +const BACKFILL_MIGRATION = '20260526170000_backfill_legacy_embedding_model_stamp.sql'; + +describe('embedding model literal drift guard', () => { + it('backfill migration stamps NULL rows with DEFAULT_EMBEDDING_MODEL', () => { + const sql = fs.readFileSync(path.join(MIGRATIONS_DIR, BACKFILL_MIGRATION), 'utf-8'); + const match = sql.match(/SET embedding_model = '([^']+)'/); + expect(match, 'backfill UPDATE literal not found in migration').not.toBeNull(); + expect(match?.[1]).toBe(DEFAULT_EMBEDDING_MODEL); + }); +}); diff --git a/packages/server/src/utils/embeddings.ts b/packages/server/src/utils/embeddings.ts index 6a2e9aa03..60f1c12f1 100644 --- a/packages/server/src/utils/embeddings.ts +++ b/packages/server/src/utils/embeddings.ts @@ -14,7 +14,7 @@ import logger from '../utils/logger'; const EMBEDDING_DIMENSIONS = 768; // Must match @lobu/embeddings DEFAULT_MODEL_NAME (the local pipeline + service // fall back to this when EMBEDDINGS_MODEL is unset). -const DEFAULT_EMBEDDING_MODEL = 'Xenova/bge-base-en-v1.5'; +export const DEFAULT_EMBEDDING_MODEL = 'Xenova/bge-base-en-v1.5'; // Allowlist mirroring the embeddings service's MODEL_NAME_PATTERN. Used to // reject anything unsafe before inlining the configured model into SQL. const MODEL_NAME_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._/:-]{0,127}$/;