diff --git a/packages/cli/scripts/build.cjs b/packages/cli/scripts/build.cjs index 3f4664660..adf8df087 100644 --- a/packages/cli/scripts/build.cjs +++ b/packages/cli/scripts/build.cjs @@ -35,6 +35,16 @@ if (fs.existsSync(providersSrc)) { // compiles them on demand when a workspace installs or runs one. copyDirIfExists("../connectors/src", "dist/connectors"); +// Vendor the precomputed catalog manifest the server build emits next to its +// own bundled connectors (.catalog-manifest.json). With it, `lobu run` serves +// the connector picker without compiling every connector on demand. CI builds +// the server first, so it's present; if absent (local CLI build without +// build:server) the runtime falls back to on-demand compilation — no regression. +const catalogManifestSrc = "../server/dist/connectors/.catalog-manifest.json"; +if (fs.existsSync(catalogManifestSrc) && fs.existsSync("dist/connectors")) { + fs.cpSync(catalogManifestSrc, "dist/connectors/.catalog-manifest.json"); +} + // Copy database migrations for the bundled PGlite local server. copyDirIfExists("../../db/migrations", "dist/db/migrations"); diff --git a/packages/server/package.json b/packages/server/package.json index fff83dd33..8c36a6d3a 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -12,7 +12,7 @@ "dev": "tsx watch --ignore=../web/** --ignore=../owletto/** --ignore=../../node_modules/** src/server.ts", "dev:local": "tsx watch --ignore=../web/** --ignore=../owletto/** --ignore=../../node_modules/** src/server.ts", "start": "tsx src/server.ts", - "build:server": "node ./scripts/build-server-bundle.mjs", + "build:server": "node ./scripts/build-server-bundle.mjs && bun ./scripts/build-connector-catalog-manifest.ts", "test": "vitest", "test:gateway": "bun test src/gateway", "test:sandbox-runtime": "SKIP_TEST_DB_SETUP=1 vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts", diff --git a/packages/server/scripts/build-connector-catalog-manifest.ts b/packages/server/scripts/build-connector-catalog-manifest.ts new file mode 100644 index 000000000..b62176d91 --- /dev/null +++ b/packages/server/scripts/build-connector-catalog-manifest.ts @@ -0,0 +1,38 @@ +/** + * Build-time generator for the connector catalog manifest. Compiles every + * bundled connector once and writes dist/connectors/.catalog-manifest.json so + * the server serves the catalog without recompiling on demand (see + * CATALOG_MANIFEST_FILENAME in connector-catalog.ts for the why). + * + * Runs after build-server-bundle.mjs (which copies the sources into + * dist/connectors). Executed under `bun` so it can import the TS catalog code. + */ +import { existsSync } from 'node:fs'; +import { writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + CATALOG_MANIFEST_FILENAME, + generateCatalogManifest, +} from '../src/utils/connector-catalog'; + +const here = dirname(fileURLToPath(import.meta.url)); +const connectorsDir = join(here, '..', 'dist', 'connectors'); + +if (!existsSync(connectorsDir)) { + console.warn( + `[catalog-manifest] ${connectorsDir} missing; skipping (run build:server first).` + ); + process.exit(0); +} + +const start = Date.now(); +const manifest = await generateCatalogManifest(connectorsDir); +const manifestPath = join(connectorsDir, CATALOG_MANIFEST_FILENAME); +await writeFile(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, 'utf-8'); + +const total = Object.keys(manifest.entries).length; +const connectors = Object.values(manifest.entries).filter(Boolean).length; +console.log( + `\n=== connector catalog manifest: ${connectors} connectors / ${total} files -> ${manifestPath} (${Date.now() - start}ms)` +); diff --git a/packages/server/src/__tests__/integration/connectors/sync-run-orphan-feed.test.ts b/packages/server/src/__tests__/integration/connectors/sync-run-orphan-feed.test.ts new file mode 100644 index 000000000..10b026ca6 --- /dev/null +++ b/packages/server/src/__tests__/integration/connectors/sync-run-orphan-feed.test.ts @@ -0,0 +1,58 @@ +/** + * createSyncRun orphan-feed handling (#1012). + * + * A feed whose connector resolves to a definition + version row that has no + * compiled code and no bundled source file (the prod `chrome.tabs` state) can + * never run. Pre-fix, createSyncRun threw on every CheckDueFeeds tick, storming + * the logs with a per-poll error and never making progress. It must instead + * soft-delete the orphan feed (mirroring the no-definition path) so it stops + * appearing in CheckDueFeeds. + */ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { Env } from '../../../index'; +import { createSyncRun } from '../../../utils/queue-helpers'; +import { cleanupTestDatabase, getTestDb } from '../../setup/test-db'; +import { + createTestConnection, + createTestConnectorDefinition, + createTestOrganization, +} from '../../setup/test-fixtures'; + +describe('createSyncRun orphan-feed handling (#1012)', () => { + beforeEach(async () => { + await cleanupTestDatabase(); + }); + + it('soft-deletes a feed whose connector has no compiled code and no bundled source instead of throwing', async () => { + const sql = getTestDb(); + const org = await createTestOrganization(); + + // Definition + version exist (so this is NOT the no-definition orphan path)… + await createTestConnectorDefinition({ + key: 'orphan.no_code', + name: 'Orphan No Code', + organization_id: org.id, + }); + // …but the version carries no runnable code, and the key is not a bundled + // connector — exactly the prod `chrome.tabs` state that threw every poll. + await sql`UPDATE connector_versions SET compiled_code = NULL WHERE connector_key = 'orphan.no_code'`; + + const conn = await createTestConnection({ + organization_id: org.id, + connector_key: 'orphan.no_code', + }); + const [feed] = await sql`SELECT id FROM feeds WHERE connection_id = ${conn.id}`; + const feedId = Number((feed as { id: number }).id); + + // Pre-fix: threw "has no compiled code and no bundled source file". + const runId = await createSyncRun(feedId, {} as Env, sql); + expect(runId).toBeNull(); + + const [after] = await sql`SELECT deleted_at FROM feeds WHERE id = ${feedId}`; + expect((after as { deleted_at: Date | null }).deleted_at).not.toBeNull(); + + // No run row was created for the orphan feed. + const runs = await sql`SELECT id FROM runs WHERE feed_id = ${feedId}`; + expect(runs.length).toBe(0); + }); +}); diff --git a/packages/server/src/utils/connector-catalog.ts b/packages/server/src/utils/connector-catalog.ts index 1f308caae..a36190708 100644 --- a/packages/server/src/utils/connector-catalog.ts +++ b/packages/server/src/utils/connector-catalog.ts @@ -1,6 +1,6 @@ import { existsSync } from 'node:fs'; -import { readdir, stat } from 'node:fs/promises'; -import { extname, relative, resolve } from 'node:path'; +import { readdir, readFile, stat } from 'node:fs/promises'; +import { extname, join, relative, resolve, sep } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; import { createConnectorCompiler, @@ -43,7 +43,7 @@ type CachedMetadata = } | undefined; -type ExtractedConnectorCatalogMetadata = { +export type ExtractedConnectorCatalogMetadata = { key: string; name: string; description: string | null; @@ -234,6 +234,129 @@ async function extractConnectorCatalogMetadata( } } +const CATALOG_MANIFEST_VERSION = 1; + +/** + * Filename of the build-time catalog manifest written next to the bundled + * connector sources (see `scripts/build-connector-catalog-manifest.ts`). It maps + * each connector file (path relative to the catalog dir, POSIX separators) to its + * already-extracted metadata, so the runtime can serve the bundled catalog + * WITHOUT compiling ~35 connectors on demand. That cold per-pod scan (esbuild + + * a forked subprocess per connector, run serially) overran the request timeout + * on freshly-rolled, CPU-limited prod replicas and returned 503 to the "Add a + * connection" picker, which then rendered an empty "No connectors found". + * + * Files NOT covered by the manifest (custom `CONNECTOR_CATALOG_URIS` dirs, a + * missing/stale/corrupt manifest) still fall back to on-demand compilation, so + * the dynamic runtime path is fully preserved. + */ +export const CATALOG_MANIFEST_FILENAME = '.catalog-manifest.json'; + +export interface CatalogManifest { + version: number; + // null = file carries no ConnectorRuntime class (utility/index file). Recorded + // so the runtime doesn't recompile it just to rediscover it's not a connector. + entries: Record; +} + +// mtime-keyed so a regenerated manifest (dev) is picked up, and a known-bad +// manifest isn't re-warned on every request. +const manifestCache = new Map< + string, + { mtimeMs: number; entries: CatalogManifest['entries'] | null } +>(); + +// Manifests are keyed by POSIX-relative path so a manifest built on Linux (CI) +// matches lookups on any runtime OS; mismatches simply fall back to compilation. +function toPosixRelative(dirPath: string, filePath: string): string { + return relative(dirPath, filePath).split(sep).join('/'); +} + +async function loadCatalogManifest(dirPath: string): Promise { + const manifestPath = join(dirPath, CATALOG_MANIFEST_FILENAME); + let mtimeMs: number; + try { + mtimeMs = (await stat(manifestPath)).mtimeMs; + } catch { + return null; // no manifest → on-demand compilation path + } + + const cached = manifestCache.get(manifestPath); + if (cached && cached.mtimeMs === mtimeMs) return cached.entries; + + try { + const parsed = JSON.parse(await readFile(manifestPath, 'utf-8')) as CatalogManifest; + if (parsed?.version !== CATALOG_MANIFEST_VERSION || typeof parsed.entries !== 'object') { + manifestCache.set(manifestPath, { mtimeMs, entries: null }); + return null; + } + manifestCache.set(manifestPath, { mtimeMs, entries: parsed.entries }); + return parsed.entries; + } catch (error) { + logger.warn( + { + manifest_path: manifestPath, + error: error instanceof Error ? error.message : String(error), + }, + 'Ignoring unreadable connector catalog manifest; falling back to on-demand compilation' + ); + manifestCache.set(manifestPath, { mtimeMs, entries: null }); + return null; + } +} + +/** + * Two-level scan of a catalog directory for connector source files. Shared by + * the runtime loader and the build-time manifest generator so the manifest + * covers exactly the set the runtime would scan. One level deep so primitive + * groupings like `browser/*.ts` are discovered alongside top-level service + * connectors; connectors don't currently nest deeper. + */ +async function collectConnectorSourceFiles(dirPath: string): Promise { + const candidatePaths: string[] = []; + const topEntries = await readdir(dirPath, { withFileTypes: true }); + for (const entry of topEntries.sort((a, b) => a.name.localeCompare(b.name))) { + const entryPath = resolve(dirPath, entry.name); + if (entry.isFile()) { + if (extname(entry.name) !== '.ts' || entry.name.endsWith('.d.ts')) continue; + candidatePaths.push(entryPath); + continue; + } + if (entry.isDirectory()) { + // Skip private / non-connector folders. `__tests__` ships test files that + // import `bun:test`, which esbuild can't resolve; any leading-underscore + // name is by convention not a connector grouping. + if (entry.name === '__tests__' || entry.name.startsWith('_')) continue; + try { + const subEntries = await readdir(entryPath, { withFileTypes: true }); + for (const sub of subEntries.sort((a, b) => a.name.localeCompare(b.name))) { + if (!sub.isFile()) continue; + if (extname(sub.name) !== '.ts' || sub.name.endsWith('.d.ts')) continue; + candidatePaths.push(resolve(entryPath, sub.name)); + } + } catch { + // Subdir unreadable — skip silently; don't fail the whole catalog. + } + } + } + return candidatePaths; +} + +// Manifest hit → precomputed metadata (may be null = known non-connector, skip). +// Manifest miss → compile + extract on demand (custom catalog dirs, or a bundled +// file the manifest doesn't cover). Preserves the dynamic runtime path. +async function resolveConnectorCatalogMetadata( + filePath: string, + dirPath: string, + manifest: CatalogManifest['entries'] | null +): Promise { + if (manifest) { + const rel = toPosixRelative(dirPath, filePath); + if (Object.hasOwn(manifest, rel)) return manifest[rel]; + } + return extractConnectorCatalogMetadata(filePath); +} + export async function listCatalogConnectorDefinitions( rawUris?: string ): Promise { @@ -260,40 +383,11 @@ export async function listCatalogConnectorDefinitions( continue; } - // Scan one level deep so primitive groupings like `browser/*.ts` are - // discovered alongside top-level service connectors. Two-level scan - // keeps the loader bounded — connectors don't currently nest deeper. - const candidatePaths: string[] = []; - const topEntries = await readdir(dirPath, { withFileTypes: true }); - for (const entry of topEntries.sort((a, b) => a.name.localeCompare(b.name))) { - const entryPath = resolve(dirPath, entry.name); - if (entry.isFile()) { - if (extname(entry.name) !== '.ts' || entry.name.endsWith('.d.ts')) continue; - candidatePaths.push(entryPath); - continue; - } - if (entry.isDirectory()) { - // Skip private / non-connector folders. `__tests__` ships test files - // that import `bun:test`, which esbuild can't resolve and which - // surface as catalog-cold-scan warnings; any leading-underscore name - // is by convention not a connector grouping. - if (entry.name === '__tests__' || entry.name.startsWith('_')) continue; - try { - const subEntries = await readdir(entryPath, { withFileTypes: true }); - for (const sub of subEntries.sort((a, b) => a.name.localeCompare(b.name))) { - if (!sub.isFile()) continue; - if (extname(sub.name) !== '.ts' || sub.name.endsWith('.d.ts')) continue; - candidatePaths.push(resolve(entryPath, sub.name)); - } - } catch { - // Subdir unreadable — skip silently. Top-level scan still produced - // whatever it could; don't fail the whole catalog over one bad dir. - } - } - } + const manifest = await loadCatalogManifest(dirPath); + const candidatePaths = await collectConnectorSourceFiles(dirPath); for (const filePath of candidatePaths) { - const metadata = await extractConnectorCatalogMetadata(filePath); + const metadata = await resolveConnectorCatalogMetadata(filePath, dirPath, manifest); if (!metadata || seenKeys.has(metadata.key)) continue; seenKeys.add(metadata.key); @@ -325,6 +419,20 @@ export async function listCatalogConnectorDefinitions( return definitions.sort((a, b) => a.name.localeCompare(b.name)); } +/** + * Build-time: compile every bundled connector once and capture its metadata so + * the runtime serves the catalog without on-demand compilation. Non-connector + * files are stored as `null` so they aren't recompiled at runtime. Invoked by + * `scripts/build-connector-catalog-manifest.ts`. + */ +export async function generateCatalogManifest(dirPath: string): Promise { + const entries: CatalogManifest['entries'] = {}; + for (const filePath of await collectConnectorSourceFiles(dirPath)) { + entries[toPosixRelative(dirPath, filePath)] = await extractConnectorCatalogMetadata(filePath); + } + return { version: CATALOG_MANIFEST_VERSION, entries }; +} + /** A bundled connector that runs on a device worker rather than the cloud fleet. */ export interface BundledDeviceConnector { /** Connector key, e.g. `apple.screen_time`. */ diff --git a/packages/server/src/utils/queue-helpers.ts b/packages/server/src/utils/queue-helpers.ts index 394f20f23..c396a9847 100644 --- a/packages/server/src/utils/queue-helpers.ts +++ b/packages/server/src/utils/queue-helpers.ts @@ -53,11 +53,35 @@ export interface WatcherRunPayload { // ============================================ /** - * Create a pending sync run for a feed. - * - * @param feedId Feed ID - * @param env Environment bindings - * @returns Run ID if created, null if skipped + * A feed whose connector can't be resolved to runnable code is an orphan: the + * connector was archived/uninstalled, or its version was registered without + * compiled code and has no bundled source to compile on demand. Soft-delete it + * in-place so it stops appearing in CheckDueFeeds — a warn + return null would + * repeat at the same cadence forever (~1/min), and a large enough orphan set + * fills the CheckDueFeeds LIMIT 100 and starves legitimate feeds. Operators + * recover by registering connector code and clearing `deleted_at`. + */ +async function softDeleteOrphanFeed( + sql: DbClient, + feedId: number, + feed: { connector_key: string; organization_id: string }, + reason: string +): Promise { + await sql` + UPDATE feeds + SET deleted_at = current_timestamp + WHERE id = ${feedId} + `; + logger.warn( + { feedId, connector_key: feed.connector_key, organization_id: feed.organization_id }, + `[queue] Soft-deleted orphan feed — ${reason}` + ); +} + +/** + * Create a pending sync run for a feed (within an existing client/tx). Returns + * the run ID, or null if skipped — a duplicate active sync run, or an + * unresolvable orphan feed that was soft-deleted (see softDeleteOrphanFeed). */ async function createSyncRunWithClient(sql: DbClient, feedId: number): Promise { // Check if there's already a pending/running run for this feed @@ -109,29 +133,14 @@ async function createSyncRunWithClient(sql: DbClient, feedId: number): Promise