diff --git a/.github/workflows/build-pgvector-embedded.yml b/.github/workflows/build-pgvector-embedded.yml new file mode 100644 index 000000000..ad515911a --- /dev/null +++ b/.github/workflows/build-pgvector-embedded.yml @@ -0,0 +1,121 @@ +name: Build pgvector-embedded artifacts + +# Rebuilds the prebuilt pgvector artifacts vendored in +# packages/pgvector-embedded/prebuilt// for every platform +# embedded-postgres supports. Run on demand (bump pgvector / PG major) or when +# the build script changes. embedded-postgres ships vanilla PG 18 with no +# pgvector, so each cell compiles pgvector against a same-major PostgreSQL (the +# extension ABI is stable within a major) and uploads the result; a final job +# opens a PR with the regenerated artifacts. +# +# Windows (windows-x64) is intentionally NOT built yet — pgvector on Windows +# needs an MSVC/nmake build the bash script doesn't cover. Follow-up. + +on: + workflow_dispatch: + inputs: + pgvector_version: + description: pgvector git tag to build + default: v0.8.1 + push: + paths: + - packages/pgvector-embedded/scripts/build.sh + - .github/workflows/build-pgvector-embedded.yml + +permissions: + contents: write + pull-requests: write + +env: + PGVECTOR_VERSION: ${{ inputs.pgvector_version || 'v0.8.1' }} + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - { platform: darwin-arm64, runner: macos-15 } + # macos-15-intel is GitHub's x86_64 macOS runner. The old macos-13 + # (Intel) image was retired Dec 2025, so that label queues forever. + # macos-15-intel runs Intel on macOS 15 (available until ~Aug 2027, + # the last x86_64 macOS image GitHub will offer). + - { platform: darwin-x64, runner: macos-15-intel } + - { platform: linux-x64, runner: ubuntu-latest } + - { platform: linux-arm64, runner: ubuntu-24.04-arm } + runs-on: ${{ matrix.runner }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install PostgreSQL 18 dev (macOS) + if: runner.os == 'macOS' + run: | + brew install postgresql@18 + echo "PG_CONFIG=$(brew --prefix postgresql@18)/bin/pg_config" >> "$GITHUB_ENV" + + - name: Install PostgreSQL 18 dev (Linux) + if: runner.os == 'Linux' + run: | + # Use the official pgdg setup script rather than a hand-rolled + # curl|gpg --dearmor pipe: the latter intermittently fails on GitHub + # runners with "gpg: cannot open '/dev/tty'". The script adds the + # repo + key non-interactively. + sudo apt-get update + sudo apt-get install -y postgresql-common + sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y + sudo apt-get install -y postgresql-server-dev-18 build-essential + echo "PG_CONFIG=/usr/lib/postgresql/18/bin/pg_config" >> "$GITHUB_ENV" + + - name: Build pgvector artifact + run: | + chmod +x packages/pgvector-embedded/scripts/build.sh + PLATFORM=${{ matrix.platform }} packages/pgvector-embedded/scripts/build.sh + + - uses: actions/upload-artifact@v4 + with: + name: pgvector-${{ matrix.platform }} + path: packages/pgvector-embedded/prebuilt/${{ matrix.platform }} + if-no-files-found: error + + open-pr: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + path: /tmp/pgvector-artifacts + + - name: Stage regenerated artifacts + run: | + for dir in /tmp/pgvector-artifacts/pgvector-*; do + platform="$(basename "$dir" | sed 's/^pgvector-//')" + dest="packages/pgvector-embedded/prebuilt/${platform}" + rm -rf "$dest" + mkdir -p "$dest" + cp -R "$dir/." "$dest/" + done + + - name: Open PR + env: + GH_TOKEN: ${{ github.token }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + branch="chore/pgvector-embedded-${PGVECTOR_VERSION}-${GITHUB_RUN_ID}" + git switch -c "$branch" + git add packages/pgvector-embedded/prebuilt + if git diff --cached --quiet; then + echo "No artifact changes — nothing to PR." + exit 0 + fi + git commit -m "chore(pgvector-embedded): rebuild prebuilt artifacts (${PGVECTOR_VERSION})" + git push -u origin "$branch" + gh pr create \ + --base main \ + --head "$branch" \ + --title "chore(pgvector-embedded): rebuild prebuilt artifacts (${PGVECTOR_VERSION})" \ + --body "Regenerated by the build-pgvector-embedded workflow (pgvector ${PGVECTOR_VERSION}, PG 18)." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 614255caf..20da65a3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -202,11 +202,18 @@ jobs: # the `import` condition and fails to load if dist is absent — # which transitively breaks every integration file whose import # graph touches `queue-helpers` / `worker-api` / `connector-catalog`. + # + # pgvector-embedded must build too: the test backend + # (src/__tests__/setup/embedded-postgres-backend.ts) imports + # `@lobu/pgvector-embedded`, so vite resolves its `import` condition + # (./dist/index.js) at transform time even though the embedded backend + # is never started when DATABASE_URL points at the external Postgres. run: | cd packages/core && bun run build && cd ../.. cd packages/connector-sdk && bun run build && cd ../.. cd packages/embeddings && bun run build && cd ../.. cd packages/connector-worker && bun run build && cd ../.. + cd packages/pgvector-embedded && bun run build && cd ../.. - name: Verify Postgres health (fail fast if pgvector setup is broken) run: | diff --git a/AGENTS.md b/AGENTS.md index cef6cecf7..826467355 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -106,6 +106,8 @@ Rules for agents: - `packages/cli/src/commands/_lib/connector-run-cmd.ts` — `browser-mirror`, `devtools-active-port`, `executeCompiledConnector`. - `packages/cli/src/commands/_lib/apply/desired-state.ts` — `yaml` (loaded only on YAML inputs). - `packages/cli/src/commands/memory/_lib/browser-auth-cmd.ts` — `decryptChromeCookiesMacOS`, `playwright/chromium`. + - `packages/server/src/server.ts` — `./embedded-runtime` is statically imported, but `./server-lifecycle` is lazy: its transitive imports read env at module-eval, and the embedded branch only finalises DATABASE_URL during `main()`. Loading the lifecycle eagerly would snapshot a stale env. + - `packages/server/src/embedded-runtime.ts` — `embedded-postgres` + `@lobu/pgvector-embedded` (and `postgres`) are lazy so the external/prod path (postgres:// URL) never resolves or loads the ~145MB embedded-Postgres binary even though it sits in node_modules. Only reached when DATABASE_URL is a path/file://. - **Tests** — `await import(...)` inside `beforeAll` / `beforeEach` / `test()` is allowed (load after `vi.mock(...)`); this is the vitest pattern, not a production exemption. ## Scope discipline and branch hygiene diff --git a/Makefile b/Makefile index 6ab7e2c17..5c08d5010 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ typecheck: # Build all TypeScript packages in dependency order build-packages: @echo "📦 Building all TypeScript packages..." - @for pkg in core connector-sdk agent-worker openclaw-plugin embeddings connector-worker promptfoo-provider; do \ + @for pkg in core pgvector-embedded connector-sdk agent-worker openclaw-plugin embeddings connector-worker promptfoo-provider; do \ echo " 📦 Building packages/$$pkg..."; \ ( cd packages/$$pkg && bun run build ) || exit $$?; \ done diff --git a/bun.lock b/bun.lock index bb6827d58..53cb994b7 100644 --- a/bun.lock +++ b/bun.lock @@ -17,7 +17,7 @@ }, "packages/agent-worker": { "name": "@lobu/worker", - "version": "7.2.0", + "version": "8.0.0", "bin": { "lobu-worker": "./dist/index.js", }, @@ -44,7 +44,7 @@ }, "packages/cli": { "name": "@lobu/cli", - "version": "7.2.0", + "version": "8.0.0", "bin": { "lobu": "bin/lobu.js", }, @@ -60,8 +60,6 @@ "@chat-adapter/telegram": "4.26.0", "@chat-adapter/whatsapp": "4.26.0", "@clack/prompts": "^1.2.0", - "@electric-sql/pglite": "^0.4.3", - "@electric-sql/pglite-socket": "^0.1.3", "@hono/node-server": "^1.13.7", "@hono/zod-openapi": "^1.2.1", "@inquirer/prompts": "^7.10.1", @@ -69,6 +67,7 @@ "@lobu/connector-worker": "workspace:*", "@lobu/core": "workspace:*", "@lobu/embeddings": "workspace:*", + "@lobu/pgvector-embedded": "workspace:*", "@lobu/worker": "workspace:*", "@mariozechner/pi-ai": "^0.51.6", "@modelcontextprotocol/sdk": "^1.27.1", @@ -91,6 +90,7 @@ "commander": "^14.0.1", "cron-parser": "^5.5.0", "dotenv": "^16.4.5", + "embedded-postgres": "18.3.0-beta.17", "esbuild": "^0.27.0", "handlebars": "^4.7.9", "hono": "^4.10.4", @@ -129,7 +129,7 @@ }, "packages/connector-sdk": { "name": "@lobu/connector-sdk", - "version": "7.2.0", + "version": "8.0.0", "dependencies": { "@lobu/core": "workspace:*", "@sinclair/typebox": "^0.34.41", @@ -153,7 +153,7 @@ }, "packages/connector-worker": { "name": "@lobu/connector-worker", - "version": "7.2.0", + "version": "8.0.0", "bin": { "connector-worker": "./dist/bin.js", }, @@ -176,7 +176,7 @@ }, "packages/connectors": { "name": "@lobu/connectors", - "version": "7.2.0", + "version": "8.0.0", "dependencies": { "@lobu/connector-sdk": "workspace:*", "baileys": "7.0.0-rc.9", @@ -190,7 +190,7 @@ }, "packages/core": { "name": "@lobu/core", - "version": "7.2.0", + "version": "8.0.0", "dependencies": { "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-trace-otlp-grpc": "^0.57.0", @@ -209,7 +209,7 @@ }, "packages/embeddings": { "name": "@lobu/embeddings", - "version": "7.2.0", + "version": "8.0.0", "dependencies": { "@hono/node-server": "^1.13.7", "@xenova/transformers": "^2.17.2", @@ -240,7 +240,10 @@ }, "packages/openclaw-plugin": { "name": "@lobu/openclaw-plugin", - "version": "7.2.0", + "version": "8.0.0", + "dependencies": { + "@lobu/core": "workspace:*", + }, "devDependencies": { "@types/node": "^20.10.0", "postgres": "^3.4.7", @@ -320,9 +323,17 @@ "vitest": "^2.1.8", }, }, + "packages/pgvector-embedded": { + "name": "@lobu/pgvector-embedded", + "version": "8.0.0", + "devDependencies": { + "@types/node": "20.19.9", + "typescript": "^5.7.2", + }, + }, "packages/promptfoo-provider": { "name": "@lobu/promptfoo-provider", - "version": "0.1.0", + "version": "8.0.0", "devDependencies": { "@types/node": "^20.10.0", "typescript": "^5.3.3", @@ -375,12 +386,11 @@ "zod": "^4.4.0", }, "devDependencies": { - "@electric-sql/pglite": "^0.4.3", - "@electric-sql/pglite-postgis": "^0.0.7", - "@electric-sql/pglite-socket": "^0.1.3", + "@lobu/pgvector-embedded": "workspace:*", "@types/react": "^19.2.14", "@vitest/coverage-v8": "^2.1.8", "dotenv": "^16.4.5", + "embedded-postgres": "18.3.0-beta.17", "esbuild": "^0.27.0", "tsx": "^4.19.2", "typescript": "^5.7.2", @@ -727,9 +737,21 @@ "@electric-sql/pglite": ["@electric-sql/pglite@0.4.4", "", {}, "sha512-g/6CWAJ4XOkObWCWAQ2IReZD8VvsDy3poRHSKvpRR2F96F8WJ3HVbjpso3gN7l0q6QPPgvxSSpl/qo5k8a7mkQ=="], - "@electric-sql/pglite-postgis": ["@electric-sql/pglite-postgis@0.0.7", "", { "peerDependencies": { "@electric-sql/pglite": "0.4.5" } }, "sha512-qWbWD3WG+PL0gBQ2YzvK0RI7C0i+VPxEpGWb8cT99x1x7kujCtfTCL2gRxFWmh5rqjoZTyY69HkcXphQH4bTNA=="], + "@embedded-postgres/darwin-arm64": ["@embedded-postgres/darwin-arm64@18.3.0-beta.17", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Pvrej3Xz5flfyVc9mchVfekrKoTJyvPtM3U0vjuXamZkRKmi+inP2zRmnmzYecIVbr7Zhu82xbsCENMXrwMp9Q=="], + + "@embedded-postgres/darwin-x64": ["@embedded-postgres/darwin-x64@18.3.0-beta.17", "", { "os": "darwin", "cpu": "x64" }, "sha512-MVWe+C47pPoMD9LlIWGQkvZ5Xsu3IBo54CYqnIps/Z1byMIUBNc7y/dZ3mfqEwiCbVDVqirG0CU462xnrSEfKA=="], - "@electric-sql/pglite-socket": ["@electric-sql/pglite-socket@0.1.4", "", { "peerDependencies": { "@electric-sql/pglite": "0.4.4" }, "bin": { "pglite-server": "dist/scripts/server.js" } }, "sha512-2LuMFxJPE2FbjWDrWpylWwhZ5uT03rQfBLM7g2AdZmiG3JjC8sbYFkFR4ZeoqVcn22wKzpJTXFV22mQ+s4Oueg=="], + "@embedded-postgres/linux-arm": ["@embedded-postgres/linux-arm@18.3.0-beta.17", "", { "os": "linux", "cpu": "arm" }, "sha512-Y2vw7p80PO/Ko7CDm8CCpStnNfMe+oc11e0WZtqAVRjxO6H0oic/ehULhUsWU3mZm5jq7wQAv37VMzf4JN+SFQ=="], + + "@embedded-postgres/linux-arm64": ["@embedded-postgres/linux-arm64@18.3.0-beta.17", "", { "os": "linux", "cpu": "arm64" }, "sha512-hXp7yHJHYWkdjkgF6As8whEHbdYxhBdmXeLpLTw0aiac0O6+0Cbqk3cOR9U+e49oyIpElHVwZUo6OewquSRhSg=="], + + "@embedded-postgres/linux-ia32": ["@embedded-postgres/linux-ia32@18.3.0-beta.17", "", { "os": "linux", "cpu": "ia32" }, "sha512-hVUOM+7QxkzAIdN3gewfVwL1EpJIx+0qUiNTD8cMqRtaZyU87e4AFIvBS0UiDJ9xzMTVWr/X24wePtbvIbkopg=="], + + "@embedded-postgres/linux-ppc64": ["@embedded-postgres/linux-ppc64@18.3.0-beta.17", "", { "os": "linux", "cpu": "ppc64" }, "sha512-p3/u4YUqSdE2CKUBlC84JGZCi6RnE1fyeLPIIVy2DJUiKtExR5rE3OpDJcVoN40uecYGL+nR4qFocGzDwG1TBw=="], + + "@embedded-postgres/linux-x64": ["@embedded-postgres/linux-x64@18.3.0-beta.17", "", { "os": "linux", "cpu": "x64" }, "sha512-8orSD6NNopSLtjqir4dWQBrj+g8j1eJjWd9mB60A3xbWMzIBIPQpzT7XzbacW9YFSl/DejOLnRXfff+Wr13Tgw=="], + + "@embedded-postgres/windows-x64": ["@embedded-postgres/windows-x64@18.3.0-beta.17", "", { "os": "win32", "cpu": "x64" }, "sha512-kDC5aBsmhWDjeQjj2V4g+Bk+pMeDU27b7l0rBbaKgtt2gsNmCB34ULg/5cqs2kqUKSk/tiGMHKCNE+zQZ+s4rg=="], "@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" } }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="], @@ -1059,6 +1081,8 @@ "@lobu/owletto": ["@lobu/owletto@workspace:packages/owletto"], + "@lobu/pgvector-embedded": ["@lobu/pgvector-embedded@workspace:packages/pgvector-embedded"], + "@lobu/promptfoo-provider": ["@lobu/promptfoo-provider@workspace:packages/promptfoo-provider"], "@lobu/server": ["@lobu/server@workspace:packages/server"], @@ -2037,6 +2061,8 @@ "async": ["async@3.2.6", "", {}, "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA=="], + "async-exit-hook": ["async-exit-hook@2.0.1", "", {}, "sha512-NW2cX8m1Q7KPA7a5M2ULQeZ2wR5qI5PAbw5L0UOMxdioVk9PMZ0h1TmyZEkPYrCvYjDlFICusOu1dlEKAAeXBw=="], + "async-lock": ["async-lock@1.4.1", "", {}, "sha512-Az2ZTpuytrtqENulXwO3GGv1Bztugx6TT37NIo7imr/Qo0gsYiGtSdBa2B6fsXhTpVZDNfu1Qn3pk531e3q+nQ=="], "async-mutex": ["async-mutex@0.5.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-1A94B18jkJ3DYq284ohPxoXbfTA5HsQ7/Mf4DEhcyLx3Bz27Rh59iScbB6EPiP+B+joue6YCxcMXSbFC1tZKwA=="], @@ -2375,6 +2401,8 @@ "elen": ["elen@1.0.10", "", {}, "sha512-ZL799/V/kzxYJ6Wlfktreq6qQWfGc3VkGUQJW5lZQ8/MhsQiKTAwERPfhEwIsV2movRGe2DfV7H2MjRw76Z7Wg=="], + "embedded-postgres": ["embedded-postgres@18.3.0-beta.17", "", { "dependencies": { "async-exit-hook": "^2.0.1", "pg": "^8.7.3" }, "optionalDependencies": { "@embedded-postgres/darwin-arm64": "^18.3.0-beta.17", "@embedded-postgres/darwin-x64": "^18.3.0-beta.17", "@embedded-postgres/linux-arm": "^18.3.0-beta.17", "@embedded-postgres/linux-arm64": "^18.3.0-beta.17", "@embedded-postgres/linux-ia32": "^18.3.0-beta.17", "@embedded-postgres/linux-ppc64": "^18.3.0-beta.17", "@embedded-postgres/linux-x64": "^18.3.0-beta.17", "@embedded-postgres/windows-x64": "^18.3.0-beta.17" } }, "sha512-1biFWyuPVtAV5S9RBgcr4PGuAdNL9WhnNZVQ5Arp3gsB24Ci9X9s/8Z7RFYFSc6tJWcj9kzF55YcDAcr3jLUbQ=="], + "emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], "emojilib": ["emojilib@2.4.0", "", {}, "sha512-5U0rVMU5Y2n2+ykNLQqMoqklN9ICBT/KsvC1Gz6vqHbz2AXXGkG+Pm5rMWk/8Vjrr/mY9985Hi8DYzn1F09Nyw=="], diff --git a/db/migrations/20260520120000_geo_earthdistance.sql b/db/migrations/20260520120000_geo_earthdistance.sql new file mode 100644 index 000000000..ca46f5682 --- /dev/null +++ b/db/migrations/20260520120000_geo_earthdistance.sql @@ -0,0 +1,100 @@ +-- migrate:up + +-- Reverse-geocoding (BASIC tier): nearest GeoNames place from a lat/lng, +-- backing `geo_lookup()` for event geo enrichment (see +-- packages/server/src/utils/geo-enrichment.ts). +-- +-- Built on core-contrib `cube` + `earthdistance` (great-circle distance over +-- 3D earth points) — NOT PostGIS. earthdistance ships in every standard +-- Postgres (embedded, RDS, Homebrew, …), so this works on every backend with +-- zero extra binaries. The accurate tiers (street address, venue/POI) are +-- filled in on-device by the apple.photos connector via Apple frameworks; see +-- the place_name note in packages/connectors/src/apple_photos.ts. +-- +-- The schema was lost in the 2026-05-19 migration squash; this re-adds it +-- unconditionally (the tables are empty until seeded by scripts/seed-geo-data.sh, +-- and geo-enrichment self-disables while geo_places is empty). + +CREATE EXTENSION IF NOT EXISTS cube; +CREATE EXTENSION IF NOT EXISTS earthdistance; + +CREATE TABLE IF NOT EXISTS public.geo_countries ( + code text PRIMARY KEY, + code3 text, + name text NOT NULL, + continent text +); + +CREATE TABLE IF NOT EXISTS public.geo_admin1 ( + code text PRIMARY KEY, -- "." e.g. "IT.07" + country_code text NOT NULL, + name text NOT NULL, + ascii_name text +); + +CREATE TABLE IF NOT EXISTS public.geo_places ( + geonameid bigint PRIMARY KEY, + name text NOT NULL, + ascii_name text, + alt_names text, + latitude double precision NOT NULL, + longitude double precision NOT NULL, + feature_class text, + feature_code text, + country_code text, + admin1_code text, + admin2_code text, + population bigint, + elevation_m integer, + timezone text +); + +-- GiST KNN index over the 3D earth point. ll_to_earth is IMMUTABLE, so it is +-- index-able; `<->` (cube distance) drives nearest-neighbour scans. +CREATE INDEX IF NOT EXISTS geo_places_earth_idx + ON public.geo_places USING gist (ll_to_earth(latitude, longitude)); + +-- Nearest place to (lat,lng). KNN orders by chord distance (`<->`, index-backed); +-- chord distance is monotonic with great-circle, so the nearest by `<->` is the +-- nearest by earth_distance, which we report in km. Country/admin1 names are +-- joined from their reference tables. +CREATE OR REPLACE FUNCTION public.geo_lookup(in_lat double precision, in_lng double precision) +RETURNS TABLE ( + place_name text, + place_id bigint, + country_code text, + country_name text, + admin1_code text, + admin1_name text, + timezone text, + population bigint, + distance_km double precision +) +LANGUAGE sql STABLE AS $$ + SELECT + p.name AS place_name, + p.geonameid AS place_id, + p.country_code, + c.name AS country_name, + p.admin1_code, + a.name AS admin1_name, + p.timezone, + p.population, + earth_distance( + ll_to_earth(p.latitude, p.longitude), + ll_to_earth(in_lat, in_lng) + ) / 1000.0 AS distance_km + FROM public.geo_places p + LEFT JOIN public.geo_countries c ON c.code = p.country_code + LEFT JOIN public.geo_admin1 a + ON a.code = p.country_code || '.' || p.admin1_code + ORDER BY ll_to_earth(p.latitude, p.longitude) <-> ll_to_earth(in_lat, in_lng) + LIMIT 1 +$$; + +-- migrate:down + +DROP FUNCTION IF EXISTS public.geo_lookup(double precision, double precision); +DROP TABLE IF EXISTS public.geo_places; +DROP TABLE IF EXISTS public.geo_admin1; +DROP TABLE IF EXISTS public.geo_countries; diff --git a/docker/app/Dockerfile b/docker/app/Dockerfile index 62368629c..c5e60a2a6 100644 --- a/docker/app/Dockerfile +++ b/docker/app/Dockerfile @@ -31,6 +31,11 @@ COPY packages/openclaw-plugin/package.json packages/openclaw-plugin/ COPY packages/connectors/package.json packages/connectors/ COPY packages/embeddings/package.json packages/embeddings/ COPY packages/connector-worker/package.json packages/connector-worker/ +# server depends on @lobu/pgvector-embedded (workspace:*); its manifest must be +# present for bun install to resolve the workspace member and for tsc to find +# its types. Prod never loads it at runtime (external Postgres) — it's pruned +# from the runtime node_modules below. +COPY packages/pgvector-embedded/package.json packages/pgvector-embedded/ # packages/owletto is a private git submodule. The glob lets the copy succeed # when the submodule is not initialized (external contributors / backend-only builds). COPY packages/owletto/package.jso[n] packages/owletto/ @@ -64,6 +69,7 @@ COPY packages/openclaw-plugin/ packages/openclaw-plugin/ COPY packages/connectors/ packages/connectors/ COPY packages/embeddings/ packages/embeddings/ COPY packages/connector-worker/ packages/connector-worker/ +COPY packages/pgvector-embedded/ packages/pgvector-embedded/ COPY packages/server/ packages/server/ # Copy the private frontend submodule when present. COPY packages/owletto packages/owletto @@ -76,6 +82,7 @@ RUN cd packages/core && bunx tsc \ && cd ../connector-sdk && bunx tsc \ && cd ../embeddings && bunx tsc \ && cd ../connector-worker && bunx tsc \ + && cd ../pgvector-embedded && bunx tsc \ && cd ../openclaw-plugin && bun run build # Type check @@ -86,6 +93,13 @@ RUN cd packages/server && bunx tsc --noEmit # for the full rationale. RUN cd packages/server && bun run build:server +# Prune the embedded-Postgres runtime (~145MB platform binary) before it reaches +# the runtime image. Prod always uses an external DATABASE_URL (postgres://), so +# server.ts never loads embedded-postgres or the pgvector injector — both are +# reached only via await import() on the file:// path. The builder needs them +# for the type-check + bundle above, so we drop them only afterward. +RUN rm -rf node_modules/embedded-postgres node_modules/@embedded-postgres + # Build frontend static assets (production only) ARG VITE_API_URL= ENV VITE_API_URL=$VITE_API_URL diff --git a/packages/cli/package.json b/packages/cli/package.json index 86e8f63d7..63d4bf264 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -36,8 +36,6 @@ "@chat-adapter/telegram": "4.26.0", "@chat-adapter/whatsapp": "4.26.0", "@clack/prompts": "^1.2.0", - "@electric-sql/pglite": "^0.4.3", - "@electric-sql/pglite-socket": "^0.1.3", "@hono/node-server": "^1.13.7", "@hono/zod-openapi": "^1.2.1", "@inquirer/prompts": "^7.10.1", @@ -45,6 +43,7 @@ "@lobu/connector-worker": "workspace:*", "@lobu/core": "workspace:*", "@lobu/embeddings": "workspace:*", + "@lobu/pgvector-embedded": "workspace:*", "@lobu/worker": "workspace:*", "@mariozechner/pi-ai": "^0.51.6", "@modelcontextprotocol/sdk": "^1.27.1", @@ -68,6 +67,7 @@ "commander": "^14.0.1", "cron-parser": "^5.5.0", "dotenv": "^16.4.5", + "embedded-postgres": "18.3.0-beta.17", "esbuild": "^0.27.0", "handlebars": "^4.7.9", "hono": "^4.10.4", diff --git a/packages/cli/scripts/build.cjs b/packages/cli/scripts/build.cjs index 976703992..15526db95 100644 --- a/packages/cli/scripts/build.cjs +++ b/packages/cli/scripts/build.cjs @@ -44,7 +44,7 @@ copyDirIfExists("../../db/migrations", "dist/db/migrations"); // what ships inside the CLI tarball. CI's publish flow builds the bundles // (`build:server`) before this script runs; if they're missing locally, run // `bun run --filter '@lobu/server' build:server` first. -for (const bundleName of ["server.bundle.mjs", "start-local.bundle.mjs"]) { +for (const bundleName of ["server.bundle.mjs"]) { const bundleSrc = `../server/dist/${bundleName}`; const bundleDest = `dist/${bundleName}`; if (fs.existsSync(bundleSrc)) { diff --git a/packages/cli/src/__tests__/dev.test.ts b/packages/cli/src/__tests__/dev.test.ts index 736cd25e0..5edd6eaa7 100644 --- a/packages/cli/src/__tests__/dev.test.ts +++ b/packages/cli/src/__tests__/dev.test.ts @@ -40,22 +40,18 @@ describe("lobu run backend bundle resolution", () => { } }); - test("finds backend bundles copied to the CLI dist root", () => { + test("finds the server bundle copied to the CLI dist root", () => { const root = mkdtempSync(join(tmpdir(), "lobu-cli-dist-")); tempDirs.push(root); const commandsDir = join(root, "dist", "commands"); mkdirSync(commandsDir, { recursive: true }); - const postgresBundlePath = join(root, "dist", "server.bundle.mjs"); - const pgliteBundlePath = join(root, "dist", "start-local.bundle.mjs"); - writeFileSync(postgresBundlePath, "// bundle placeholder\n"); - writeFileSync(pgliteBundlePath, "// bundle placeholder\n"); + // Single bundle for both backends — it self-selects on DATABASE_URL. + const bundlePath = join(root, "dist", "server.bundle.mjs"); + writeFileSync(bundlePath, "// bundle placeholder\n"); - expect(resolveBackendBundle(commandsDir, "postgres")).toBe( - postgresBundlePath - ); - expect(resolveBackendBundle(commandsDir, "pglite")).toBe(pgliteBundlePath); + expect(resolveBackendBundle(commandsDir)).toBe(bundlePath); }); test("CLI package declares runtime deps for the embedded server bundle", () => { @@ -97,17 +93,16 @@ describe("lobu run backend bundle resolution", () => { // These are server build/dev deps today, but the embedded runtime imports // them at startup, while compiling bundled connector code, or while running - // local PGlite. - for (const name of [ - "dotenv", - "esbuild", - "vite", - "@electric-sql/pglite", - "@electric-sql/pglite-socket", - ]) { + // the local embedded Postgres. + for (const name of ["dotenv", "esbuild", "vite", "embedded-postgres"]) { expect(cliRuntimeDeps[name]).toBeDefined(); } + // @lobu/pgvector-embedded is the one @lobu/* dep the bundle keeps EXTERNAL + // (it ships prebuilt binary assets esbuild can't inline), so the published + // CLI must declare it explicitly. + expect(cliRuntimeDeps["@lobu/pgvector-embedded"]).toBeDefined(); + // Compiled connector code deliberately leaves these native/browser deps // external, so npx-installed CLIs must provide them too. for (const name of ["playwright", "sharp", "jimp"]) { @@ -253,6 +248,6 @@ describe("lobu run backend bundle resolution", () => { "utf8" ); expect(buildScript).toContain('copyDirIfExists("../../db/migrations"'); - expect(buildScript).toContain('"start-local.bundle.mjs"'); + expect(buildScript).toContain('"server.bundle.mjs"'); }); }); diff --git a/packages/cli/src/commands/dev.ts b/packages/cli/src/commands/dev.ts index 8c3248d5a..e3d64ace0 100644 --- a/packages/cli/src/commands/dev.ts +++ b/packages/cli/src/commands/dev.ts @@ -3,6 +3,7 @@ import { existsSync, readFileSync } from "node:fs"; import { readFile } from "node:fs/promises"; import { createRequire } from "node:module"; import { createServer } from "node:net"; +import { homedir } from "node:os"; import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import chalk from "chalk"; @@ -53,6 +54,34 @@ export function isSharedDatabaseUrl(databaseUrl: string): boolean { } } +/** + * `DATABASE_URL` is the single backend selector: + * - a `postgres://` / `postgresql://` URL → connect to an external Postgres + * - anything else (a filesystem path, optionally `file:`-prefixed) → boot a + * local embedded Postgres with its data under `/.lobu/pgdata` + * + * `lobu run` defaults the path to the user's home dir when nothing is set, so a + * bare `lobu run` still works (data at `~/.lobu/pgdata`). The runtime itself + * always receives an explicit path — the default is injected here, at the CLI + * frontend, exactly like the menubar app supplies its own path. + */ +export function isExternalDatabaseUrl(databaseUrl: string): boolean { + return /^postgres(ql)?:\/\//i.test(databaseUrl.trim()); +} + +/** + * Resolve the embedded data ROOT from a path-form DATABASE_URL: strips a + * leading `file:` and expands a leading `~`. The Postgres cluster lives at + * `/.lobu/pgdata` (see embedded-runtime.ts). + */ +export function resolveEmbeddedDataRoot(databaseUrl: string): string { + let p = databaseUrl.trim().replace(/^file:(\/\/)?/i, ""); + if (p === "~" || p.startsWith("~/")) { + p = join(homedir(), p.slice(1)); + } + return resolve(p); +} + /** * Decide whether `lobu run` must refuse to boot because the EFFECTIVE * DATABASE_URL points at a shared/non-local DB the project never opted into. @@ -81,15 +110,13 @@ export function shouldRefuseSharedDatabaseUrl(input: { return isSharedDatabaseUrl(effective); } -type BackendBundleKind = "postgres" | "pglite"; - /** * `lobu run` — start the embedded Lobu stack. * - * By default this uses the bundled local PGlite runtime, so a freshly - * scaffolded project can boot without Docker or a separate Postgres. When - * DATABASE_URL is set in .env or the shell, it instead starts the external - * Postgres runtime against that database. + * `DATABASE_URL` selects the backend (see `isExternalDatabaseUrl`): a + * `postgres://` URL connects to an external Postgres; a filesystem path boots a + * local embedded Postgres rooted there. Unset defaults to an embedded DB at + * `~/.lobu/pgdata`. */ export async function devCommand( cwd: string, @@ -119,16 +146,24 @@ export async function devCommand( ...envVars, ...(process.env as Record), }; - const effectiveDatabaseUrl = mergedEnv.DATABASE_URL?.trim(); - const hasDatabaseUrl = Boolean(effectiveDatabaseUrl); - - // Refuse to boot against a shared/non-local DATABASE_URL that came from the - // parent shell rather than the project's own .env. A common footgun: + // DATABASE_URL is the backend selector: a postgres:// URL → external; any + // other value (a path) → embedded PG rooted there; unset → embedded at the + // user's home dir. The CLI injects the path default so the runtime always + // receives an explicit DATABASE_URL. + const databaseUrlRaw = mergedEnv.DATABASE_URL?.trim() ?? ""; + const mode: "external" | "embedded" = + databaseUrlRaw && isExternalDatabaseUrl(databaseUrlRaw) + ? "external" + : "embedded"; + + // Refuse to boot against a shared/non-local external DATABASE_URL inherited + // from the parent shell rather than the project's own .env. A common footgun: // "local lobu run" silently writes into prod / a teammate's tailnet DB. - // Project pinning in .env is explicit consent. + // Embedded paths are always local (not URLs), so this only fires for external + // postgres:// URLs; project pinning in .env is explicit consent. if ( shouldRefuseSharedDatabaseUrl({ - effectiveDatabaseUrl, + effectiveDatabaseUrl: databaseUrlRaw, projectEnvDatabaseUrl: envVars.DATABASE_URL, unsafeSharedDb: options.unsafeSharedDb, }) @@ -136,7 +171,7 @@ export async function devCommand( spinner.fail("DATABASE_URL inherited from shell points at a shared DB"); console.error( chalk.red( - `\n Refusing to start: DATABASE_URL=${redactUrl(mergedEnv.DATABASE_URL!)}\n` + `\n Refusing to start: DATABASE_URL=${redactUrl(databaseUrlRaw)}\n` ) ); console.error( @@ -164,7 +199,9 @@ export async function devCommand( ) ); console.error( - chalk.dim(" • unset DATABASE_URL in this shell (PGlite will be used)") + chalk.dim( + " • set DATABASE_URL to a directory path for a local embedded Postgres" + ) ); console.error( chalk.dim( @@ -173,16 +210,23 @@ export async function devCommand( ); process.exit(1); } - const bundleKind: BackendBundleKind = hasDatabaseUrl ? "postgres" : "pglite"; - const bundlePath = resolveBackendBundle(undefined, bundleKind); + + // Embedded: resolve the data root and pass it through as the explicit + // DATABASE_URL path the single server bundle reads. A path-form DATABASE_URL + // wins; otherwise default to the user's home dir. The bundle puts the cluster + // at /.lobu/pgdata. + let embeddedDataRoot: string | null = null; + if (mode === "embedded") { + embeddedDataRoot = resolveEmbeddedDataRoot(databaseUrlRaw || "~"); + mergedEnv.DATABASE_URL = embeddedDataRoot; + } + + // One bundle for both backends — it self-selects on DATABASE_URL. + const bundlePath = resolveBackendBundle(); if (!bundlePath) { spinner.fail("server bundle not found"); - const bundleName = - bundleKind === "pglite" ? "start-local.bundle.mjs" : "server.bundle.mjs"; console.error( - chalk.red( - `\n Could not locate the embedded server bundle (${bundleName}).\n` - ) + chalk.red("\n Could not locate the server bundle (server.bundle.mjs).\n") ); console.error( chalk.dim( @@ -200,9 +244,9 @@ export async function devCommand( } spinner.succeed( - hasDatabaseUrl + mode === "external" ? "Environment ready" - : "Environment ready (using local PGlite)" + : "Environment ready (local embedded Postgres)" ); const portRaw = @@ -238,15 +282,15 @@ export async function devCommand( await printPreviewInstructions(cwd); console.log(chalk.cyan(`\n Starting Lobu...\n`)); console.log(chalk.dim(` bundle: ${bundlePath}`)); - if (hasDatabaseUrl) { + if (mode === "external") { console.log( chalk.dim(` database: ${redactUrl(mergedEnv.DATABASE_URL!)}`) ); } else { - console.log(chalk.dim(" database: local PGlite")); + console.log(chalk.dim(" database: local embedded Postgres")); console.log( chalk.dim( - ` data: ${mergedEnv.LOBU_DATA_DIR || "~/.lobu/data"}` + ` data: ${join(embeddedDataRoot!, ".lobu", "pgdata")}` ) ); } @@ -307,7 +351,7 @@ export async function devCommand( // click the URL straight from their terminal and land logged in. Also // persists the session as the `local` CLI context so `lobu chat -c local` // works without a separate `lobu login`. - void announceLocalSignIn(gatewayUrl, !hasDatabaseUrl); + void announceLocalSignIn(gatewayUrl, mode === "embedded"); // Forward Ctrl+C to the child so it can clean up its own subprocess workers // before the parent exits. SIGKILL after a timeout in case it wedges. @@ -373,13 +417,11 @@ export function findEnclosingMonorepoRoot(startDir: string): string | null { } export function resolveBackendBundle( - startDir = dirname(fileURLToPath(import.meta.url)), - kind: BackendBundleKind = "postgres" + startDir = dirname(fileURLToPath(import.meta.url)) ): string | null { const here = startDir; const require_ = createRequire(import.meta.url); - const bundleName = - kind === "pglite" ? "start-local.bundle.mjs" : "server.bundle.mjs"; + const bundleName = "server.bundle.mjs"; for (const bundled of [ join(here, bundleName), @@ -388,12 +430,10 @@ export function resolveBackendBundle( if (existsSync(bundled)) return bundled; } - if (kind === "postgres") { - try { - return require_.resolve("@lobu/server/dist/server.bundle.mjs"); - } catch { - // not installed as a dep - } + try { + return require_.resolve("@lobu/server/dist/server.bundle.mjs"); + } catch { + // not installed as a dep } let cur = here; @@ -420,17 +460,17 @@ export function resolveBackendBundle( */ async function announceLocalSignIn( gatewayUrl: string, - pgLite: boolean + embedded: boolean ): Promise { // Poll briefly so the announce lands AFTER the server's own startup // banner without racing it. const reachable = await waitForServerReachable(gatewayUrl); if (!reachable) return; - // Only the embedded PGlite path seeds the bootstrap user → /local-init - // will refuse on a Postgres-backed deployment with real signups. Skip - // the network call entirely in that case to keep the banner quiet. - if (!pgLite) return; + // Only the embedded path seeds the bootstrap user → /local-init will refuse + // on an external-Postgres deployment with real signups. Skip the network + // call entirely in that case to keep the banner quiet. + if (!embedded) return; try { const res = await fetch(`${gatewayUrl}/api/local-init`, { diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts index 0a6e89578..6d8e28a14 100644 --- a/packages/cli/src/commands/init.ts +++ b/packages/cli/src/commands/init.ts @@ -288,6 +288,52 @@ export async function initCommand( }) ); + // Database: local embedded Postgres (zero-config) or an existing one. The + // chosen value is written verbatim to DATABASE_URL — `file://.` boots an + // isolated embedded PG under ./.lobu/pgdata; a postgres:// URL connects out. + const databaseChoice = await promptOrDefault({ + flag: undefined, + useDefaults, + defaultValue: "embedded", + validate: (v: string) => + v === "embedded" || + v === "external" || + /^(postgres(ql)?|file):/i.test(v.trim()) + ? true + : "Must be 'embedded', 'external', or a postgres:// / file:// URL", + prompt: () => + select({ + message: "Database?", + choices: [ + { + name: "Local embedded Postgres — zero-config, data in ./.lobu (recommended)", + value: "embedded", + }, + { name: "Connect to an existing Postgres", value: "external" }, + ], + default: "embedded", + }), + }); + + let databaseUrl: string; + if (databaseChoice === "external") { + databaseUrl = ( + await input({ + message: "Postgres connection URL?", + validate: (v: string) => + /^postgres(ql)?:\/\//i.test(v.trim()) + ? true + : "Must be a postgres:// URL", + }) + ).trim(); + } else if (/^(postgres(ql)?|file):/i.test(databaseChoice.trim())) { + // A URL passed directly (e.g. via --yes with an explicit value). + databaseUrl = databaseChoice.trim(); + } else { + // embedded → isolated per-project Postgres at ./.lobu/pgdata + databaseUrl = "file://."; + } + const publicGatewayUrl = await promptOrDefault({ flag: options.publicUrl, useDefaults, @@ -575,6 +621,7 @@ export async function initCommand( ENCRYPTION_KEY: answers.encryptionKey, GATEWAY_PORT: gatewayPort, WORKER_PROXY_PORT: workerProxyPort, + DATABASE_URL: databaseUrl, WORKER_ALLOWED_DOMAINS: answers.allowedDomains, WORKER_DISALLOWED_DOMAINS: answers.disallowedDomains, }; @@ -678,14 +725,12 @@ export async function initCommand( if (!here) { console.log(chalk.cyan(` ${n++}. cd ${projectName}`)); } - console.log( - chalk.cyan( - ` ${n++}. Start the local stack: lobu run (uses PGlite by default)` - ) - ); + console.log(chalk.cyan(` ${n++}. Start the local stack: lobu run`)); console.log( chalk.dim( - " Optional: set DATABASE_URL in .env to use external Postgres instead." + databaseUrl.startsWith("file:") + ? " Database: local embedded Postgres (./.lobu). Edit DATABASE_URL in .env to connect to an external one." + : " Database: external Postgres (DATABASE_URL in .env)." ) ); if (lobuUrl) { diff --git a/packages/cli/src/internal/credentials.ts b/packages/cli/src/internal/credentials.ts index 7768a0d8e..d44ad1109 100644 --- a/packages/cli/src/internal/credentials.ts +++ b/packages/cli/src/internal/credentials.ts @@ -127,7 +127,7 @@ export async function clearCredentials(contextName?: string): Promise { * * For loopback contexts with no stored creds, transparently POSTs * /api/local-init to mint a fresh Better Auth session for the - * embedded-PGlite bootstrap user — `lobu chat -c local` works without a + * embedded bootstrap user — `lobu chat -c local` works without a * prior `lobu login`. */ export async function getToken(contextName?: string): Promise { diff --git a/packages/cli/src/templates/.env.tmpl b/packages/cli/src/templates/.env.tmpl index 22cd48142..ce4e6fed6 100644 --- a/packages/cli/src/templates/.env.tmpl +++ b/packages/cli/src/templates/.env.tmpl @@ -5,19 +5,11 @@ GATEWAY_PORT={{GATEWAY_PORT}} # Auto-picked at scaffold time so co-resident Lobu projects don't collide. WORKER_PROXY_PORT={{WORKER_PROXY_PORT}} -# Project-local data directory for the embedded PGlite database. -# Defaults to `~/.lobu/data` (shared across projects) — overriding here keeps -# each project's DB isolated, which avoids migration collisions like -# `function "prevent_entity_cycles" already exists` when multiple projects -# share one DB. -LOBU_DATA_DIR=./.lobu-data - -# Required external services -# Lobu connects to a user-provided Postgres. Run one yourself -# (managed instance, local docker, brew services, whatever you prefer) -# and point this URL at it. Leave empty to use the embedded PGlite at -# LOBU_DATA_DIR. -DATABASE_URL= +# Database. DATABASE_URL selects the backend: +# file:// → local embedded Postgres (cluster lives under /.lobu/pgdata) +# postgres://... → connect to an existing Postgres you run yourself +# `file://.` keeps an isolated DB inside this project (./.lobu/pgdata, gitignored). +DATABASE_URL={{DATABASE_URL}} # Security ENCRYPTION_KEY={{ENCRYPTION_KEY}} diff --git a/packages/connectors/src/apple_photos.ts b/packages/connectors/src/apple_photos.ts index c2ed33abe..974b6f5aa 100644 --- a/packages/connectors/src/apple_photos.ts +++ b/packages/connectors/src/apple_photos.ts @@ -142,6 +142,19 @@ export default class ApplePhotosConnector extends ConnectorRuntime { type: ['string', 'null'], description: 'Reverse-geocoded human-readable place from CLGeocoder when available offline.', + // TODO(geo-enrichment tiers): the gateway only does the BASIC + // tier — nearest city/region/country via the bundled + // `geo_lookup()` (cube+earthdistance over GeoNames; no PostGIS). + // Accurate tiers must be filled in HERE, on-device, because + // they need Apple frameworks the server can't call: + // - street address → CLGeocoder.reverseGeocodeLocation + // - venue / POI ("Joe's Pizza") → MKLocalSearch / + // MKLocalPointsOfInterestRequest near the coordinate, + // or the place name Apple Photos already attached. + // Populate place_name (and a future address/venue field) from + // the device; the gateway leaves them as-is and only backfills + // city/region/country when null. Cloud Places API is the + // cross-platform fallback if geo ever runs off-Mac. }, people: { type: 'array', diff --git a/packages/owletto b/packages/owletto index 524ad50c8..872a6ea43 160000 --- a/packages/owletto +++ b/packages/owletto @@ -1 +1 @@ -Subproject commit 524ad50c8132557563745a34685a7a65595f20e9 +Subproject commit 872a6ea43ca201b3ad8b34fa50d26b6c06b3053e diff --git a/packages/pgvector-embedded/package.json b/packages/pgvector-embedded/package.json new file mode 100644 index 000000000..67f6a7f20 --- /dev/null +++ b/packages/pgvector-embedded/package.json @@ -0,0 +1,41 @@ +{ + "name": "@lobu/pgvector-embedded", + "version": "8.0.0", + "license": "BUSL-1.1", + "type": "module", + "description": "Prebuilt pgvector binaries + injector for embedded-postgres (Lobu local runtime)", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "scripts": { + "typecheck": "tsc --noEmit", + "build": "tsc", + "build:binary": "scripts/build.sh" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "bun": "./src/index.ts", + "import": "./dist/index.js", + "require": "./dist/index.js" + } + }, + "engines": { + "node": ">=20" + }, + "publishConfig": { + "access": "public" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/lobu-ai/lobu.git", + "directory": "packages/pgvector-embedded" + }, + "devDependencies": { + "@types/node": "20.19.9", + "typescript": "^5.7.2" + }, + "files": [ + "dist", + "prebuilt" + ] +} diff --git a/packages/pgvector-embedded/prebuilt/darwin-arm64/vector--0.8.1.sql b/packages/pgvector-embedded/prebuilt/darwin-arm64/vector--0.8.1.sql new file mode 100644 index 000000000..7fc36712b --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/darwin-arm64/vector--0.8.1.sql @@ -0,0 +1,918 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION vector" to load this file. \quit + +-- vector type + +CREATE TYPE vector; + +CREATE FUNCTION vector_in(cstring, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_out(vector) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = external +); + +-- vector functions + +CREATE FUNCTION l2_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(vector) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_norm(vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(vector) RETURNS bit + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(vector, int, int) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector private functions + +CREATE FUNCTION vector_add(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_sub(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_mul(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_concat(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_lt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_le(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_eq(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ne(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ge(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_gt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_cmp(vector, vector) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_l2_squared_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_negative_inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_spherical_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_accum(double precision[], vector) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_avg(double precision[]) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector aggregates + +CREATE AGGREGATE avg(vector) ( + SFUNC = vector_accum, + STYPE = double precision[], + FINALFUNC = vector_avg, + COMBINEFUNC = vector_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(vector) ( + SFUNC = vector_add, + STYPE = vector, + COMBINEFUNC = vector_add, + PARALLEL = SAFE +); + +-- vector cast functions + +CREATE FUNCTION vector(vector, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(integer[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(real[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(double precision[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(numeric[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_float4(vector, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector casts + +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (vector AS real[]) + WITH FUNCTION vector_to_float4(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS vector) + WITH FUNCTION array_to_vector(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS vector) + WITH FUNCTION array_to_vector(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS vector) + WITH FUNCTION array_to_vector(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS ASSIGNMENT; + +-- vector operators + +CREATE OPERATOR <-> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_sub +); + +CREATE OPERATOR * ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat +); + +CREATE OPERATOR < ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- access methods + +CREATE FUNCTION ivfflathandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +COMMENT ON ACCESS METHOD ivfflat IS 'ivfflat index access method'; + +CREATE FUNCTION hnswhandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; + +-- access method private functions + +CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_sparsevec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +-- vector opclasses + +CREATE OPERATOR CLASS vector_ops + DEFAULT FOR TYPE vector USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 vector_cmp(vector, vector); + +CREATE OPERATOR CLASS vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector), + FUNCTION 3 l2_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l1_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(vector, vector); + +-- halfvec type + +CREATE TYPE halfvec; + +CREATE FUNCTION halfvec_in(cstring, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_out(halfvec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_recv(internal, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_send(halfvec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE halfvec ( + INPUT = halfvec_in, + OUTPUT = halfvec_out, + TYPMOD_IN = halfvec_typmod_in, + RECEIVE = halfvec_recv, + SEND = halfvec_send, + STORAGE = external +); + +-- halfvec functions + +CREATE FUNCTION l2_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(halfvec) RETURNS integer + AS 'MODULE_PATHNAME', 'halfvec_vector_dims' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(halfvec) RETURNS bit + AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(halfvec, int, int) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec private functions + +CREATE FUNCTION halfvec_add(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_sub(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_mul(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_concat(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_lt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_le(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_eq(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ne(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ge(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_gt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_cmp(halfvec, halfvec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_l2_squared_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_negative_inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_spherical_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_accum(double precision[], halfvec) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_avg(double precision[]) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME', 'vector_combine' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec aggregates + +CREATE AGGREGATE avg(halfvec) ( + SFUNC = halfvec_accum, + STYPE = double precision[], + FINALFUNC = halfvec_avg, + COMBINEFUNC = halfvec_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(halfvec) ( + SFUNC = halfvec_add, + STYPE = halfvec, + COMBINEFUNC = halfvec_add, + PARALLEL = SAFE +); + +-- halfvec cast functions + +CREATE FUNCTION halfvec(halfvec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_vector(halfvec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_halfvec(vector, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(integer[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(real[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(double precision[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(numeric[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_float4(halfvec, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec casts + +CREATE CAST (halfvec AS halfvec) + WITH FUNCTION halfvec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS vector) + WITH FUNCTION halfvec_to_vector(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS halfvec) + WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS real[]) + WITH FUNCTION halfvec_to_float4(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (integer[] AS halfvec) + WITH FUNCTION array_to_halfvec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS halfvec) + WITH FUNCTION array_to_halfvec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS halfvec) + WITH FUNCTION array_to_halfvec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS halfvec) + WITH FUNCTION array_to_halfvec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- halfvec operators + +CREATE OPERATOR <-> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_sub +); + +CREATE OPERATOR * ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_concat +); + +CREATE OPERATOR < ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- halfvec opclasses + +CREATE OPERATOR CLASS halfvec_ops + DEFAULT FOR TYPE halfvec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 halfvec_cmp(halfvec, halfvec); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 l2_distance(halfvec, halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +-- bit functions + +CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION jaccard_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- bit operators + +CREATE OPERATOR <~> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = hamming_distance, + COMMUTATOR = '<~>' +); + +CREATE OPERATOR <%> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = jaccard_distance, + COMMUTATOR = '<%>' +); + +-- bit opclasses + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING ivfflat AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hamming_distance(bit, bit), + FUNCTION 5 ivfflat_bit_support(internal); + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +CREATE OPERATOR CLASS bit_jaccard_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 jaccard_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +--- sparsevec type + +CREATE TYPE sparsevec; + +CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE sparsevec ( + INPUT = sparsevec_in, + OUTPUT = sparsevec_out, + TYPMOD_IN = sparsevec_typmod_in, + RECEIVE = sparsevec_recv, + SEND = sparsevec_send, + STORAGE = external +); + +-- sparsevec functions + +CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(sparsevec) RETURNS sparsevec + AS 'MODULE_PATHNAME', 'sparsevec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec private functions + +CREATE FUNCTION sparsevec_lt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_le(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_eq(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ne(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ge(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_gt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_cmp(sparsevec, sparsevec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec cast functions + +CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec casts + +CREATE CAST (sparsevec AS sparsevec) + WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS vector) + WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS sparsevec) + WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS halfvec) + WITH FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (halfvec AS sparsevec) + WITH FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- sparsevec operators + +CREATE OPERATOR <-> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR < ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- sparsevec opclasses + +CREATE OPERATOR CLASS sparsevec_ops + DEFAULT FOR TYPE sparsevec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 sparsevec_cmp(sparsevec, sparsevec); + +CREATE OPERATOR CLASS sparsevec_l2_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_ip_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_cosine_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 2 l2_norm(sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_l1_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); diff --git a/packages/pgvector-embedded/prebuilt/darwin-arm64/vector.control b/packages/pgvector-embedded/prebuilt/darwin-arm64/vector.control new file mode 100644 index 000000000..2ad02286a --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/darwin-arm64/vector.control @@ -0,0 +1,4 @@ +comment = 'vector data type and ivfflat and hnsw access methods' +default_version = '0.8.1' +module_pathname = '$libdir/vector' +relocatable = true diff --git a/packages/pgvector-embedded/prebuilt/darwin-arm64/vector.dylib b/packages/pgvector-embedded/prebuilt/darwin-arm64/vector.dylib new file mode 100644 index 000000000..7c7d67ca8 Binary files /dev/null and b/packages/pgvector-embedded/prebuilt/darwin-arm64/vector.dylib differ diff --git a/packages/pgvector-embedded/prebuilt/darwin-x64/vector--0.8.1.sql b/packages/pgvector-embedded/prebuilt/darwin-x64/vector--0.8.1.sql new file mode 100644 index 000000000..7fc36712b --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/darwin-x64/vector--0.8.1.sql @@ -0,0 +1,918 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION vector" to load this file. \quit + +-- vector type + +CREATE TYPE vector; + +CREATE FUNCTION vector_in(cstring, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_out(vector) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = external +); + +-- vector functions + +CREATE FUNCTION l2_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(vector) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_norm(vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(vector) RETURNS bit + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(vector, int, int) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector private functions + +CREATE FUNCTION vector_add(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_sub(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_mul(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_concat(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_lt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_le(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_eq(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ne(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ge(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_gt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_cmp(vector, vector) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_l2_squared_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_negative_inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_spherical_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_accum(double precision[], vector) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_avg(double precision[]) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector aggregates + +CREATE AGGREGATE avg(vector) ( + SFUNC = vector_accum, + STYPE = double precision[], + FINALFUNC = vector_avg, + COMBINEFUNC = vector_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(vector) ( + SFUNC = vector_add, + STYPE = vector, + COMBINEFUNC = vector_add, + PARALLEL = SAFE +); + +-- vector cast functions + +CREATE FUNCTION vector(vector, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(integer[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(real[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(double precision[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(numeric[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_float4(vector, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector casts + +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (vector AS real[]) + WITH FUNCTION vector_to_float4(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS vector) + WITH FUNCTION array_to_vector(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS vector) + WITH FUNCTION array_to_vector(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS vector) + WITH FUNCTION array_to_vector(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS ASSIGNMENT; + +-- vector operators + +CREATE OPERATOR <-> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_sub +); + +CREATE OPERATOR * ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat +); + +CREATE OPERATOR < ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- access methods + +CREATE FUNCTION ivfflathandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +COMMENT ON ACCESS METHOD ivfflat IS 'ivfflat index access method'; + +CREATE FUNCTION hnswhandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; + +-- access method private functions + +CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_sparsevec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +-- vector opclasses + +CREATE OPERATOR CLASS vector_ops + DEFAULT FOR TYPE vector USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 vector_cmp(vector, vector); + +CREATE OPERATOR CLASS vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector), + FUNCTION 3 l2_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l1_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(vector, vector); + +-- halfvec type + +CREATE TYPE halfvec; + +CREATE FUNCTION halfvec_in(cstring, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_out(halfvec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_recv(internal, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_send(halfvec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE halfvec ( + INPUT = halfvec_in, + OUTPUT = halfvec_out, + TYPMOD_IN = halfvec_typmod_in, + RECEIVE = halfvec_recv, + SEND = halfvec_send, + STORAGE = external +); + +-- halfvec functions + +CREATE FUNCTION l2_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(halfvec) RETURNS integer + AS 'MODULE_PATHNAME', 'halfvec_vector_dims' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(halfvec) RETURNS bit + AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(halfvec, int, int) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec private functions + +CREATE FUNCTION halfvec_add(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_sub(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_mul(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_concat(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_lt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_le(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_eq(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ne(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ge(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_gt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_cmp(halfvec, halfvec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_l2_squared_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_negative_inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_spherical_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_accum(double precision[], halfvec) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_avg(double precision[]) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME', 'vector_combine' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec aggregates + +CREATE AGGREGATE avg(halfvec) ( + SFUNC = halfvec_accum, + STYPE = double precision[], + FINALFUNC = halfvec_avg, + COMBINEFUNC = halfvec_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(halfvec) ( + SFUNC = halfvec_add, + STYPE = halfvec, + COMBINEFUNC = halfvec_add, + PARALLEL = SAFE +); + +-- halfvec cast functions + +CREATE FUNCTION halfvec(halfvec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_vector(halfvec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_halfvec(vector, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(integer[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(real[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(double precision[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(numeric[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_float4(halfvec, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec casts + +CREATE CAST (halfvec AS halfvec) + WITH FUNCTION halfvec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS vector) + WITH FUNCTION halfvec_to_vector(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS halfvec) + WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS real[]) + WITH FUNCTION halfvec_to_float4(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (integer[] AS halfvec) + WITH FUNCTION array_to_halfvec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS halfvec) + WITH FUNCTION array_to_halfvec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS halfvec) + WITH FUNCTION array_to_halfvec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS halfvec) + WITH FUNCTION array_to_halfvec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- halfvec operators + +CREATE OPERATOR <-> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_sub +); + +CREATE OPERATOR * ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_concat +); + +CREATE OPERATOR < ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- halfvec opclasses + +CREATE OPERATOR CLASS halfvec_ops + DEFAULT FOR TYPE halfvec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 halfvec_cmp(halfvec, halfvec); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 l2_distance(halfvec, halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +-- bit functions + +CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION jaccard_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- bit operators + +CREATE OPERATOR <~> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = hamming_distance, + COMMUTATOR = '<~>' +); + +CREATE OPERATOR <%> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = jaccard_distance, + COMMUTATOR = '<%>' +); + +-- bit opclasses + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING ivfflat AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hamming_distance(bit, bit), + FUNCTION 5 ivfflat_bit_support(internal); + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +CREATE OPERATOR CLASS bit_jaccard_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 jaccard_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +--- sparsevec type + +CREATE TYPE sparsevec; + +CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE sparsevec ( + INPUT = sparsevec_in, + OUTPUT = sparsevec_out, + TYPMOD_IN = sparsevec_typmod_in, + RECEIVE = sparsevec_recv, + SEND = sparsevec_send, + STORAGE = external +); + +-- sparsevec functions + +CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(sparsevec) RETURNS sparsevec + AS 'MODULE_PATHNAME', 'sparsevec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec private functions + +CREATE FUNCTION sparsevec_lt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_le(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_eq(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ne(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ge(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_gt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_cmp(sparsevec, sparsevec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec cast functions + +CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec casts + +CREATE CAST (sparsevec AS sparsevec) + WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS vector) + WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS sparsevec) + WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS halfvec) + WITH FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (halfvec AS sparsevec) + WITH FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- sparsevec operators + +CREATE OPERATOR <-> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR < ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- sparsevec opclasses + +CREATE OPERATOR CLASS sparsevec_ops + DEFAULT FOR TYPE sparsevec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 sparsevec_cmp(sparsevec, sparsevec); + +CREATE OPERATOR CLASS sparsevec_l2_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_ip_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_cosine_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 2 l2_norm(sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_l1_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); diff --git a/packages/pgvector-embedded/prebuilt/darwin-x64/vector.control b/packages/pgvector-embedded/prebuilt/darwin-x64/vector.control new file mode 100644 index 000000000..2ad02286a --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/darwin-x64/vector.control @@ -0,0 +1,4 @@ +comment = 'vector data type and ivfflat and hnsw access methods' +default_version = '0.8.1' +module_pathname = '$libdir/vector' +relocatable = true diff --git a/packages/pgvector-embedded/prebuilt/darwin-x64/vector.dylib b/packages/pgvector-embedded/prebuilt/darwin-x64/vector.dylib new file mode 100644 index 000000000..8872c3316 Binary files /dev/null and b/packages/pgvector-embedded/prebuilt/darwin-x64/vector.dylib differ diff --git a/packages/pgvector-embedded/prebuilt/linux-arm64/vector--0.8.1.sql b/packages/pgvector-embedded/prebuilt/linux-arm64/vector--0.8.1.sql new file mode 100644 index 000000000..7fc36712b --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/linux-arm64/vector--0.8.1.sql @@ -0,0 +1,918 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION vector" to load this file. \quit + +-- vector type + +CREATE TYPE vector; + +CREATE FUNCTION vector_in(cstring, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_out(vector) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = external +); + +-- vector functions + +CREATE FUNCTION l2_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(vector) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_norm(vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(vector) RETURNS bit + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(vector, int, int) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector private functions + +CREATE FUNCTION vector_add(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_sub(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_mul(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_concat(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_lt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_le(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_eq(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ne(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ge(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_gt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_cmp(vector, vector) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_l2_squared_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_negative_inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_spherical_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_accum(double precision[], vector) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_avg(double precision[]) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector aggregates + +CREATE AGGREGATE avg(vector) ( + SFUNC = vector_accum, + STYPE = double precision[], + FINALFUNC = vector_avg, + COMBINEFUNC = vector_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(vector) ( + SFUNC = vector_add, + STYPE = vector, + COMBINEFUNC = vector_add, + PARALLEL = SAFE +); + +-- vector cast functions + +CREATE FUNCTION vector(vector, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(integer[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(real[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(double precision[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(numeric[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_float4(vector, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector casts + +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (vector AS real[]) + WITH FUNCTION vector_to_float4(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS vector) + WITH FUNCTION array_to_vector(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS vector) + WITH FUNCTION array_to_vector(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS vector) + WITH FUNCTION array_to_vector(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS ASSIGNMENT; + +-- vector operators + +CREATE OPERATOR <-> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_sub +); + +CREATE OPERATOR * ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat +); + +CREATE OPERATOR < ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- access methods + +CREATE FUNCTION ivfflathandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +COMMENT ON ACCESS METHOD ivfflat IS 'ivfflat index access method'; + +CREATE FUNCTION hnswhandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; + +-- access method private functions + +CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_sparsevec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +-- vector opclasses + +CREATE OPERATOR CLASS vector_ops + DEFAULT FOR TYPE vector USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 vector_cmp(vector, vector); + +CREATE OPERATOR CLASS vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector), + FUNCTION 3 l2_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l1_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(vector, vector); + +-- halfvec type + +CREATE TYPE halfvec; + +CREATE FUNCTION halfvec_in(cstring, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_out(halfvec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_recv(internal, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_send(halfvec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE halfvec ( + INPUT = halfvec_in, + OUTPUT = halfvec_out, + TYPMOD_IN = halfvec_typmod_in, + RECEIVE = halfvec_recv, + SEND = halfvec_send, + STORAGE = external +); + +-- halfvec functions + +CREATE FUNCTION l2_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(halfvec) RETURNS integer + AS 'MODULE_PATHNAME', 'halfvec_vector_dims' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(halfvec) RETURNS bit + AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(halfvec, int, int) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec private functions + +CREATE FUNCTION halfvec_add(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_sub(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_mul(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_concat(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_lt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_le(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_eq(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ne(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ge(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_gt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_cmp(halfvec, halfvec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_l2_squared_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_negative_inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_spherical_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_accum(double precision[], halfvec) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_avg(double precision[]) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME', 'vector_combine' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec aggregates + +CREATE AGGREGATE avg(halfvec) ( + SFUNC = halfvec_accum, + STYPE = double precision[], + FINALFUNC = halfvec_avg, + COMBINEFUNC = halfvec_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(halfvec) ( + SFUNC = halfvec_add, + STYPE = halfvec, + COMBINEFUNC = halfvec_add, + PARALLEL = SAFE +); + +-- halfvec cast functions + +CREATE FUNCTION halfvec(halfvec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_vector(halfvec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_halfvec(vector, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(integer[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(real[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(double precision[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(numeric[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_float4(halfvec, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec casts + +CREATE CAST (halfvec AS halfvec) + WITH FUNCTION halfvec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS vector) + WITH FUNCTION halfvec_to_vector(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS halfvec) + WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS real[]) + WITH FUNCTION halfvec_to_float4(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (integer[] AS halfvec) + WITH FUNCTION array_to_halfvec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS halfvec) + WITH FUNCTION array_to_halfvec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS halfvec) + WITH FUNCTION array_to_halfvec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS halfvec) + WITH FUNCTION array_to_halfvec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- halfvec operators + +CREATE OPERATOR <-> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_sub +); + +CREATE OPERATOR * ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_concat +); + +CREATE OPERATOR < ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- halfvec opclasses + +CREATE OPERATOR CLASS halfvec_ops + DEFAULT FOR TYPE halfvec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 halfvec_cmp(halfvec, halfvec); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 l2_distance(halfvec, halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +-- bit functions + +CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION jaccard_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- bit operators + +CREATE OPERATOR <~> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = hamming_distance, + COMMUTATOR = '<~>' +); + +CREATE OPERATOR <%> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = jaccard_distance, + COMMUTATOR = '<%>' +); + +-- bit opclasses + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING ivfflat AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hamming_distance(bit, bit), + FUNCTION 5 ivfflat_bit_support(internal); + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +CREATE OPERATOR CLASS bit_jaccard_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 jaccard_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +--- sparsevec type + +CREATE TYPE sparsevec; + +CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE sparsevec ( + INPUT = sparsevec_in, + OUTPUT = sparsevec_out, + TYPMOD_IN = sparsevec_typmod_in, + RECEIVE = sparsevec_recv, + SEND = sparsevec_send, + STORAGE = external +); + +-- sparsevec functions + +CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(sparsevec) RETURNS sparsevec + AS 'MODULE_PATHNAME', 'sparsevec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec private functions + +CREATE FUNCTION sparsevec_lt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_le(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_eq(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ne(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ge(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_gt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_cmp(sparsevec, sparsevec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec cast functions + +CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec casts + +CREATE CAST (sparsevec AS sparsevec) + WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS vector) + WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS sparsevec) + WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS halfvec) + WITH FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (halfvec AS sparsevec) + WITH FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- sparsevec operators + +CREATE OPERATOR <-> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR < ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- sparsevec opclasses + +CREATE OPERATOR CLASS sparsevec_ops + DEFAULT FOR TYPE sparsevec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 sparsevec_cmp(sparsevec, sparsevec); + +CREATE OPERATOR CLASS sparsevec_l2_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_ip_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_cosine_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 2 l2_norm(sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_l1_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); diff --git a/packages/pgvector-embedded/prebuilt/linux-arm64/vector.control b/packages/pgvector-embedded/prebuilt/linux-arm64/vector.control new file mode 100644 index 000000000..2ad02286a --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/linux-arm64/vector.control @@ -0,0 +1,4 @@ +comment = 'vector data type and ivfflat and hnsw access methods' +default_version = '0.8.1' +module_pathname = '$libdir/vector' +relocatable = true diff --git a/packages/pgvector-embedded/prebuilt/linux-arm64/vector.so b/packages/pgvector-embedded/prebuilt/linux-arm64/vector.so new file mode 100644 index 000000000..5e41c56dd Binary files /dev/null and b/packages/pgvector-embedded/prebuilt/linux-arm64/vector.so differ diff --git a/packages/pgvector-embedded/prebuilt/linux-x64/vector--0.8.1.sql b/packages/pgvector-embedded/prebuilt/linux-x64/vector--0.8.1.sql new file mode 100644 index 000000000..7fc36712b --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/linux-x64/vector--0.8.1.sql @@ -0,0 +1,918 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION vector" to load this file. \quit + +-- vector type + +CREATE TYPE vector; + +CREATE FUNCTION vector_in(cstring, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_out(vector) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = external +); + +-- vector functions + +CREATE FUNCTION l2_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(vector) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_norm(vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(vector) RETURNS bit + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(vector, int, int) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector private functions + +CREATE FUNCTION vector_add(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_sub(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_mul(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_concat(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_lt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_le(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_eq(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ne(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ge(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_gt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_cmp(vector, vector) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_l2_squared_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_negative_inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_spherical_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_accum(double precision[], vector) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_avg(double precision[]) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector aggregates + +CREATE AGGREGATE avg(vector) ( + SFUNC = vector_accum, + STYPE = double precision[], + FINALFUNC = vector_avg, + COMBINEFUNC = vector_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(vector) ( + SFUNC = vector_add, + STYPE = vector, + COMBINEFUNC = vector_add, + PARALLEL = SAFE +); + +-- vector cast functions + +CREATE FUNCTION vector(vector, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(integer[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(real[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(double precision[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(numeric[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_float4(vector, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- vector casts + +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (vector AS real[]) + WITH FUNCTION vector_to_float4(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS vector) + WITH FUNCTION array_to_vector(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS vector) + WITH FUNCTION array_to_vector(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS vector) + WITH FUNCTION array_to_vector(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS ASSIGNMENT; + +-- vector operators + +CREATE OPERATOR <-> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_sub +); + +CREATE OPERATOR * ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat +); + +CREATE OPERATOR < ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- access methods + +CREATE FUNCTION ivfflathandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +COMMENT ON ACCESS METHOD ivfflat IS 'ivfflat index access method'; + +CREATE FUNCTION hnswhandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; + +-- access method private functions + +CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION hnsw_sparsevec_support(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C; + +-- vector opclasses + +CREATE OPERATOR CLASS vector_ops + DEFAULT FOR TYPE vector USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 vector_cmp(vector, vector); + +CREATE OPERATOR CLASS vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector), + FUNCTION 3 l2_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l1_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(vector, vector); + +-- halfvec type + +CREATE TYPE halfvec; + +CREATE FUNCTION halfvec_in(cstring, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_out(halfvec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_recv(internal, oid, integer) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_send(halfvec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE halfvec ( + INPUT = halfvec_in, + OUTPUT = halfvec_out, + TYPMOD_IN = halfvec_typmod_in, + RECEIVE = halfvec_recv, + SEND = halfvec_send, + STORAGE = external +); + +-- halfvec functions + +CREATE FUNCTION l2_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(halfvec) RETURNS integer + AS 'MODULE_PATHNAME', 'halfvec_vector_dims' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(halfvec) RETURNS float8 + AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION binary_quantize(halfvec) RETURNS bit + AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION subvector(halfvec, int, int) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec private functions + +CREATE FUNCTION halfvec_add(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_sub(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_mul(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_concat(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_lt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_le(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_eq(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ne(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_ge(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_gt(halfvec, halfvec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_cmp(halfvec, halfvec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_l2_squared_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_negative_inner_product(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_spherical_distance(halfvec, halfvec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_accum(double precision[], halfvec) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_avg(double precision[]) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME', 'vector_combine' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec aggregates + +CREATE AGGREGATE avg(halfvec) ( + SFUNC = halfvec_accum, + STYPE = double precision[], + FINALFUNC = halfvec_avg, + COMBINEFUNC = halfvec_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(halfvec) ( + SFUNC = halfvec_add, + STYPE = halfvec, + COMBINEFUNC = halfvec_add, + PARALLEL = SAFE +); + +-- halfvec cast functions + +CREATE FUNCTION halfvec(halfvec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_vector(halfvec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_halfvec(vector, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(integer[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(real[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(double precision[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_halfvec(numeric[], integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_float4(halfvec, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- halfvec casts + +CREATE CAST (halfvec AS halfvec) + WITH FUNCTION halfvec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS vector) + WITH FUNCTION halfvec_to_vector(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS halfvec) + WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (halfvec AS real[]) + WITH FUNCTION halfvec_to_float4(halfvec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (integer[] AS halfvec) + WITH FUNCTION array_to_halfvec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS halfvec) + WITH FUNCTION array_to_halfvec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS halfvec) + WITH FUNCTION array_to_halfvec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS halfvec) + WITH FUNCTION array_to_halfvec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- halfvec operators + +CREATE OPERATOR <-> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR + ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_sub +); + +CREATE OPERATOR * ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_mul, + COMMUTATOR = * +); + +CREATE OPERATOR || ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_concat +); + +CREATE OPERATOR < ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- halfvec opclasses + +CREATE OPERATOR CLASS halfvec_ops + DEFAULT FOR TYPE halfvec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 halfvec_cmp(halfvec, halfvec); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 l2_distance(halfvec, halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), + FUNCTION 4 l2_norm(halfvec), + FUNCTION 5 ivfflat_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l2_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_ip_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_cosine_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <=> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), + FUNCTION 2 l2_norm(halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec), + FUNCTION 3 hnsw_halfvec_support(internal); + +-- bit functions + +CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION jaccard_distance(bit, bit) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- bit operators + +CREATE OPERATOR <~> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = hamming_distance, + COMMUTATOR = '<~>' +); + +CREATE OPERATOR <%> ( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = jaccard_distance, + COMMUTATOR = '<%>' +); + +-- bit opclasses + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING ivfflat AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hamming_distance(bit, bit), + FUNCTION 5 ivfflat_bit_support(internal); + +CREATE OPERATOR CLASS bit_hamming_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 hamming_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +CREATE OPERATOR CLASS bit_jaccard_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 jaccard_distance(bit, bit), + FUNCTION 3 hnsw_bit_support(internal); + +--- sparsevec type + +CREATE TYPE sparsevec; + +CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE sparsevec ( + INPUT = sparsevec_in, + OUTPUT = sparsevec_out, + TYPMOD_IN = sparsevec_typmod_in, + RECEIVE = sparsevec_recv, + SEND = sparsevec_send, + STORAGE = external +); + +-- sparsevec functions + +CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_norm(sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME', 'sparsevec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l2_normalize(sparsevec) RETURNS sparsevec + AS 'MODULE_PATHNAME', 'sparsevec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec private functions + +CREATE FUNCTION sparsevec_lt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_le(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_eq(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ne(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_ge(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_gt(sparsevec, sparsevec) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_cmp(sparsevec, sparsevec) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec cast functions + +CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- sparsevec casts + +CREATE CAST (sparsevec AS sparsevec) + WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS vector) + WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (vector AS sparsevec) + WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (sparsevec AS halfvec) + WITH FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) AS ASSIGNMENT; + +CREATE CAST (halfvec AS sparsevec) + WITH FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT; + +-- sparsevec operators + +CREATE OPERATOR <-> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR <+> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + +CREATE OPERATOR < ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarlesel, JOIN = scalarlejoinsel +); + +CREATE OPERATOR = ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargesel, JOIN = scalargejoinsel +); + +CREATE OPERATOR > ( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- sparsevec opclasses + +CREATE OPERATOR CLASS sparsevec_ops + DEFAULT FOR TYPE sparsevec USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 sparsevec_cmp(sparsevec, sparsevec); + +CREATE OPERATOR CLASS sparsevec_l2_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_ip_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_cosine_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 2 l2_norm(sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); + +CREATE OPERATOR CLASS sparsevec_l1_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(sparsevec, sparsevec), + FUNCTION 3 hnsw_sparsevec_support(internal); diff --git a/packages/pgvector-embedded/prebuilt/linux-x64/vector.control b/packages/pgvector-embedded/prebuilt/linux-x64/vector.control new file mode 100644 index 000000000..2ad02286a --- /dev/null +++ b/packages/pgvector-embedded/prebuilt/linux-x64/vector.control @@ -0,0 +1,4 @@ +comment = 'vector data type and ivfflat and hnsw access methods' +default_version = '0.8.1' +module_pathname = '$libdir/vector' +relocatable = true diff --git a/packages/pgvector-embedded/prebuilt/linux-x64/vector.so b/packages/pgvector-embedded/prebuilt/linux-x64/vector.so new file mode 100644 index 000000000..605bb999f Binary files /dev/null and b/packages/pgvector-embedded/prebuilt/linux-x64/vector.so differ diff --git a/packages/pgvector-embedded/scripts/build.sh b/packages/pgvector-embedded/scripts/build.sh new file mode 100755 index 000000000..b94ca7637 --- /dev/null +++ b/packages/pgvector-embedded/scripts/build.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Build the pgvector artifact for the current platform and stage it under +# prebuilt//. +# +# embedded-postgres ships PostgreSQL 18.x with NO pg_config and NO server +# headers, so pgvector cannot be compiled against it directly. Instead we build +# against a separately-installed PostgreSQL of the SAME MAJOR version (18.x) and +# rely on the extension ABI being stable within a major — a library built +# against any 18.x loads into embedded-postgres's 18.x. (Validated locally: +# Homebrew PG 18.1's vector.dylib loaded into embedded-postgres PG 18.3.) +# +# Requirements (provided per CI matrix cell): +# - pg_config for PostgreSQL 18 on PATH, or PG_CONFIG pointing at it +# - a C toolchain (make + cc) +# +# Usage: +# PGVECTOR_VERSION=v0.8.1 packages/pgvector-embedded/scripts/build.sh +set -euo pipefail + +PGVECTOR_VERSION="${PGVECTOR_VERSION:-v0.8.1}" +PG_CONFIG="${PG_CONFIG:-pg_config}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PKG_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# Platform key must match embedded-postgres's package suffix and Node's +# process.platform-process.arch (darwin-arm64, darwin-x64, linux-x64, linux-arm64). +node_platform() { node -e 'process.stdout.write(`${process.platform}-${process.arch}`)'; } +PLATFORM="${PLATFORM:-$(node_platform)}" +OUT_DIR="${PKG_ROOT}/prebuilt/${PLATFORM}" + +PG_MAJOR="$("${PG_CONFIG}" --version | sed -E 's/^PostgreSQL ([0-9]+).*/\1/')" +if [[ "${PG_MAJOR}" != "18" ]]; then + echo "ERROR: pg_config reports PostgreSQL ${PG_MAJOR}, expected 18 (embedded-postgres major). Set PG_CONFIG." >&2 + exit 1 +fi + +echo "==> pgvector ${PGVECTOR_VERSION} for ${PLATFORM} against $(${PG_CONFIG} --version)" + +WORK="$(mktemp -d)" +trap 'rm -rf "${WORK}"' EXIT +git clone --depth 1 --branch "${PGVECTOR_VERSION}" https://github.com/pgvector/pgvector.git "${WORK}/pgvector" + +# OPTFLAGS="" strips pgvector's default `-march=native`. These artifacts are +# redistributed and loaded on arbitrary user CPUs, so a binary tuned to the CI +# runner's microarchitecture would SIGILL on older hardware. Build for the +# baseline target instead; the perf delta is negligible next to portability. +make -C "${WORK}/pgvector" PG_CONFIG="${PG_CONFIG}" OPTFLAGS="" + +PKGLIBDIR="$(${PG_CONFIG} --pkglibdir)" + +rm -rf "${OUT_DIR}" +mkdir -p "${OUT_DIR}" + +# Stage straight from the build dir — we run `make` but not `make install`, so +# nothing lands in the OS Postgres's pkglibdir/sharedir. The library, control +# file, and generated SQL all sit under the cloned/built pgvector tree. +# +# Compiled extension library — name differs per platform ($(DLSUFFIX)). +cp "${WORK}/pgvector/vector"*.so "${OUT_DIR}/" 2>/dev/null || true +cp "${WORK}/pgvector/vector"*.dylib "${OUT_DIR}/" 2>/dev/null || true +# Fall back to an installed copy only if the build dir somehow lacks the lib. +if ! ls "${OUT_DIR}"/vector.* >/dev/null 2>&1; then + cp "${PKGLIBDIR}/vector".* "${OUT_DIR}/" +fi + +# Control + the full-install SQL for the pinned version only. CREATE EXTENSION +# at default_version reads vector--.sql directly; the vector--A--B.sql +# upgrade scripts are only for ALTER EXTENSION ... UPDATE, which never runs on a +# fresh embedded DB, so we don't ship them. `vector.control` is checked into the +# repo; `sql/vector--.sql` is generated by `make`. +PGVECTOR_SQL_VERSION="${PGVECTOR_VERSION#v}" +cp "${WORK}/pgvector/vector.control" "${OUT_DIR}/" +cp "${WORK}/pgvector/sql/vector--${PGVECTOR_SQL_VERSION}.sql" "${OUT_DIR}/" + +echo "==> staged $(ls "${OUT_DIR}" | wc -l | tr -d ' ') files in ${OUT_DIR}" +ls -1 "${OUT_DIR}" | sed 's/^/ /' diff --git a/packages/pgvector-embedded/src/index.ts b/packages/pgvector-embedded/src/index.ts new file mode 100644 index 000000000..0f6bd0dc2 --- /dev/null +++ b/packages/pgvector-embedded/src/index.ts @@ -0,0 +1,98 @@ +/** + * @lobu/pgvector-embedded + * + * `embedded-postgres` ships vanilla PostgreSQL binaries with no pgvector. This + * package carries small prebuilt pgvector artifacts (the compiled extension + * library + its `.control` / `.sql` files) for each platform `embedded-postgres` + * supports, and injects the host platform's artifact into the live + * `@embedded-postgres//native` tree so `CREATE EXTENSION vector` + * resolves at runtime. + * + * Artifacts are built by `scripts/build.sh` (one platform per CI matrix cell) + * against a same-major PostgreSQL — the extension ABI is stable within a major, + * so a library built against PG 18.x loads into `embedded-postgres`'s PG 18.x. + */ +import { cpSync, existsSync, mkdirSync, readdirSync } from "node:fs"; +import { createRequire } from "node:module"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const require = createRequire(import.meta.url); +const PACKAGE_ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); +const PREBUILT_ROOT = join(PACKAGE_ROOT, "prebuilt"); + +/** Platform key matching `embedded-postgres`'s package suffixes (`darwin-arm64`, `linux-x64`, …). */ +export function currentPlatformKey(): string { + return `${process.platform}-${process.arch}`; +} + +/** Directory holding the prebuilt pgvector files for a platform. */ +export function prebuiltDir(platform: string = currentPlatformKey()): string { + return join(PREBUILT_ROOT, platform); +} + +/** Whether a usable prebuilt pgvector artifact exists for the platform. */ +export function hasPrebuilt(platform: string = currentPlatformKey()): boolean { + return existsSync(join(prebuiltDir(platform), "vector.control")); +} + +/** + * Resolve the `native` directory of the installed `@embedded-postgres/` + * package (the one that holds `bin/`, `lib/`, `share/`). Throws with an + * actionable message if the platform binary package isn't installed. + */ +export function resolveEmbeddedNativeDir( + platform: string = currentPlatformKey() +): string { + let entry: string; + try { + // The platform package uses a string `exports` ("./dist/index.js"), so only + // the package root resolves; walk up from there to `native`. + entry = require.resolve(`@embedded-postgres/${platform}`); + } catch { + throw new Error( + `@lobu/pgvector-embedded: @embedded-postgres/${platform} is not installed. ` + + "Install embedded-postgres so its host-platform binary package is present." + ); + } + return join(dirname(entry), "..", "native"); +} + +/** + * Copy the host platform's prebuilt pgvector files into an embedded-postgres + * `native` tree so `CREATE EXTENSION vector` works. Idempotent — returns early + * if pgvector is already present in the tree. + * + * @param nativeDir absolute path to `.../native`; defaults to the resolved + * host-platform `@embedded-postgres` package. + */ +export function injectPgvector( + nativeDir: string = resolveEmbeddedNativeDir(), + platform: string = currentPlatformKey() +): void { + const libDst = join(nativeDir, "lib", "postgresql"); + const extDst = join(nativeDir, "share", "postgresql", "extension"); + + // Already injected (or shipped) — nothing to do. + if (existsSync(join(extDst, "vector.control"))) return; + + if (!hasPrebuilt(platform)) { + throw new Error( + `@lobu/pgvector-embedded: no prebuilt pgvector for "${platform}". ` + + "Run scripts/build.sh for this platform, or set DATABASE_URL to use an external Postgres." + ); + } + + const src = prebuiltDir(platform); + mkdirSync(libDst, { recursive: true }); + mkdirSync(extDst, { recursive: true }); + + for (const file of readdirSync(src)) { + if (!file.startsWith("vector")) continue; + // The compiled library (vector.so / vector.dylib) goes to lib/postgresql; + // the control + version SQL files go to share/postgresql/extension. + const dest = + file.endsWith(".so") || file.endsWith(".dylib") ? libDst : extDst; + cpSync(join(src, file), join(dest, file)); + } +} diff --git a/packages/pgvector-embedded/tsconfig.json b/packages/pgvector-embedded/tsconfig.json new file mode 100644 index 000000000..8987ab683 --- /dev/null +++ b/packages/pgvector-embedded/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "module": "ESNext", + "target": "ES2022", + "moduleResolution": "bundler", + "esModuleInterop": true, + "skipLibCheck": true, + "strict": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "outDir": "./dist", + "resolveJsonModule": true, + "allowSyntheticDefaultImports": true, + "forceConsistentCasingInFileNames": true, + "noEmit": false + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/__tests__/**", "**/*.test.ts"] +} diff --git a/packages/server/package.json b/packages/server/package.json index c12ad5fe5..fff83dd33 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -10,12 +10,11 @@ }, "scripts": { "dev": "tsx watch --ignore=../web/** --ignore=../owletto/** --ignore=../../node_modules/** src/server.ts", - "dev:local": "tsx watch --ignore=../web/** --ignore=../owletto/** --ignore=../../node_modules/** src/start-local.ts", + "dev:local": "tsx watch --ignore=../web/** --ignore=../owletto/** --ignore=../../node_modules/** src/server.ts", "start": "tsx src/server.ts", "build:server": "node ./scripts/build-server-bundle.mjs", "test": "vitest", "test:gateway": "bun test src/gateway", - "test:pglite": "LOBU_TEST_BACKEND=pglite vitest", "test:sandbox-runtime": "SKIP_TEST_DB_SETUP=1 vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts", "typecheck": "tsc --noEmit", "lint": "biome lint src", @@ -67,10 +66,9 @@ "zod": "^4.4.0" }, "devDependencies": { - "@electric-sql/pglite": "^0.4.3", - "@electric-sql/pglite-postgis": "^0.0.7", - "@electric-sql/pglite-socket": "^0.1.3", + "@lobu/pgvector-embedded": "workspace:*", "@types/react": "^19.2.14", + "embedded-postgres": "18.3.0-beta.17", "@vitest/coverage-v8": "^2.1.8", "dotenv": "^16.4.5", "esbuild": "^0.27.0", diff --git a/packages/server/scripts/build-server-bundle.mjs b/packages/server/scripts/build-server-bundle.mjs index 303111b52..c51695cb1 100644 --- a/packages/server/scripts/build-server-bundle.mjs +++ b/packages/server/scripts/build-server-bundle.mjs @@ -1,7 +1,7 @@ #!/usr/bin/env node /** - * Bundle production and local PGlite server entrypoints into standalone ESM - * files consumed by the published @lobu/cli package. + * Bundle the single server entrypoint into a standalone ESM file consumed by + * the published @lobu/cli package. One entry (server.ts) serves both backends. * * Why: prod runs under Node so isolated-vm (V8 native addon) loads. Running * the TS source through tsx exposes Node's CJS↔ESM lexer interop with @@ -46,6 +46,11 @@ const commonOptions = { if (args.kind === 'entry-point') return null; const id = args.path; if (id.startsWith('.') || id.startsWith('/')) return null; + // @lobu/pgvector-embedded ships prebuilt binary assets under + // prebuilt/ that esbuild can't inline; keep it external so it loads + // from node_modules with its assets intact (like the npm externals). + if (id === '@lobu/pgvector-embedded' || id.startsWith('@lobu/pgvector-embedded/')) + return { external: true }; if (id.startsWith('@lobu/')) return null; return { external: true }; }); @@ -81,8 +86,10 @@ async function buildBundle(entryPoint, outfile) { console.log(` warnings: ${result.warnings.length}, errors: ${result.errors.length}`); } +// Single entry for both backends: server.ts branches on DATABASE_URL +// (postgres:// = external; path/file:// = embedded, lazy-loading the embedded +// Postgres runtime so the external/prod path never resolves that binary). await buildBundle('src/server.ts', 'dist/server.bundle.mjs'); -await buildBundle('src/start-local.ts', 'dist/start-local.bundle.mjs'); const connectorsSrc = join(pkgDir, '..', 'connectors', 'src'); const connectorsDest = join(pkgDir, 'dist', 'connectors'); diff --git a/packages/server/src/__tests__/integration/auth/single-user-signup.test.ts b/packages/server/src/__tests__/integration/auth/single-user-signup.test.ts index 1634ef022..c368b3905 100644 --- a/packages/server/src/__tests__/integration/auth/single-user-signup.test.ts +++ b/packages/server/src/__tests__/integration/auth/single-user-signup.test.ts @@ -10,17 +10,14 @@ * refused with SIGN_UP_DISABLED_IN_SINGLE_USER_MODE. * * 2. The guard does not deadlock. Sign-up runs inside Better Auth's - * runWithTransaction, which reserves the only pooled connection in - * PGlite mode (LOBU_DISABLE_PREPARE=1 → pool max=1). The hook must - * reuse that transaction connection via ctx.internalAdapter rather - * than asking getDb() for a second one. Run under - * `bun run test:pglite` this test reproduces issue #947: a regression - * to a fresh getDb() query hangs the request and fails on timeout. + * runWithTransaction, which reserves a pooled connection. The hook must + * reuse that transaction connection via ctx.internalAdapter rather than + * asking getDb() for a second one — issue #947, where a regression to a + * fresh getDb() query hung the request and failed on timeout. * * The test is backend-agnostic — it talks to the auth handler over a - * Request, reads DATABASE_URL like the rest of the suite, and so runs - * unchanged against external Postgres (default) and PGlite - * (LOBU_TEST_BACKEND=pglite). + * Request and reads DATABASE_URL like the rest of the suite, so it runs + * unchanged against any Postgres backend. */ import { verifyPassword } from "better-auth/crypto"; diff --git a/packages/server/src/__tests__/integration/geo-enrichment.test.ts b/packages/server/src/__tests__/integration/geo-enrichment.test.ts index 79187008e..3e38610d1 100644 --- a/packages/server/src/__tests__/integration/geo-enrichment.test.ts +++ b/packages/server/src/__tests__/integration/geo-enrichment.test.ts @@ -1,8 +1,8 @@ /** - * Geo enrichment — integration coverage against a real PostGIS-enabled - * PGlite. The pglite-backend test setup loads `@electric-sql/pglite-postgis` - * so the migration's DO block falls through to the real path, and the - * `geo_lookup(lat, lng)` SQL function runs against actual geography data. + * Geo enrichment — integration coverage for the `geo_lookup(lat, lng)` SQL + * function. It's created by the geo migration on cube + earthdistance (core + * contrib, no PostGIS), so it exists on every backend and this suite runs + * against the standard test database. * * The fixture is intentionally tiny (3 countries, 3 admin1 regions, 5 * cities) — just enough to prove: @@ -26,13 +26,10 @@ import { insertEvent } from '../../utils/insert-event'; import { getTestDb } from '../setup/test-db'; import { createTestOrganization } from '../setup/test-fixtures'; -// PostGIS isn't installable on every test backend — real-Postgres CI -// runners run with a plain Postgres image, so the geo migration's DO -// block bails out and `geo_lookup` never gets created. PGlite is -// configured with @electric-sql/pglite-postgis (see pglite-backend.ts), -// so the function IS available there. Probe once at module load and -// gate the whole suite — unit tests in utils/__tests__/geo-enrichment -// already cover the fail-open behaviour with stubs. +// Probe once at module load that the geo migration created `geo_lookup`, and +// gate the suite on it (defensive — a stripped-down DB without the geo +// migration just skips, rather than erroring). Unit tests in +// utils/__tests__/geo-enrichment cover the fail-open behaviour with stubs. // // The probe deliberately does NOT swallow query errors: a real DB // connection / setup failure should fail the run, not silently skip @@ -130,7 +127,6 @@ async function seedFixture(): Promise { `; } for (const p of FIXTURES.places) { - // `location` is a generated column — we never insert into it. await sql` INSERT INTO geo_places ( geonameid, name, ascii_name, latitude, longitude, @@ -146,13 +142,8 @@ async function seedFixture(): Promise { } describe.runIf(hasGeoSchema)('geo enrichment (integration)', () => { - // Deliberately NOT calling cleanupTestDatabase(): that helper TRUNCATEs - // every table in public schema, including `spatial_ref_sys` — wiping the - // 8500 SRS rows pglite-postgis populates at CREATE EXTENSION time. Once - // SRID 4326 disappears, ST_Distance + every geography op throws - // "Cannot find SRID (4326) in spatial_ref_sys". Vitest gives each test - // file a fresh PGlite anyway, and seedFixture handles geo-table isolation - // per-test, so we don't need a broader cleanup. + // seedFixture TRUNCATEs the geo tables per-test, so we don't need a broader + // cleanupTestDatabase() here. beforeEach(async () => { _resetGeoEnrichmentProbeForTests(); await seedFixture(); diff --git a/packages/server/src/__tests__/integration/identity/engine.test.ts b/packages/server/src/__tests__/integration/identity/engine.test.ts index e5ebc84e5..1883868d8 100644 --- a/packages/server/src/__tests__/integration/identity/engine.test.ts +++ b/packages/server/src/__tests__/integration/identity/engine.test.ts @@ -3,7 +3,7 @@ * * Covers UC1 / UC4 / UC5 / UC6 / UC8 / UC10 from the F1 design plan plus * basic schema-validation guards. Each test runs against a freshly-cleaned - * test DB; the pglite backend is fast enough that the per-test setup cost + * test DB; the embedded backend is fast enough that the per-test setup cost * is acceptable. * * The engine's job is narrow: given a `$member` and a batch of facts, diff --git a/packages/server/src/__tests__/integration/sandbox/namespace-dispatch.test.ts b/packages/server/src/__tests__/integration/sandbox/namespace-dispatch.test.ts index 7536b223d..f054b14f8 100644 --- a/packages/server/src/__tests__/integration/sandbox/namespace-dispatch.test.ts +++ b/packages/server/src/__tests__/integration/sandbox/namespace-dispatch.test.ts @@ -25,15 +25,6 @@ const testEnv: Env = { DATABASE_URL: process.env.DATABASE_URL, }; -/** - * Some handlers compose postgres.js tagged-template fragments - * (`sql\`${query} ORDER BY ...\``). PGlite's socket shim treats the fragment - * as a parameter instead of inlining, which produces "Promise" as $1 and a - * syntax error. Those dispatches work on real Postgres. The test suite runs - * under PGlite by default (fast, zero-deps) so we skip those cases here. - */ -const IS_PGLITE = process.env.LOBU_TEST_BACKEND === "pglite"; -const pgOnlyIt = IS_PGLITE ? it.skip : it; describe("ClientSDK namespace dispatch (read paths)", () => { let sdk: ClientSDK; @@ -62,11 +53,11 @@ describe("ClientSDK namespace dispatch (read paths)", () => { sdk = buildClientSDK(ctx, testEnv); }); - pgOnlyIt("entities.list dispatches cleanly", async () => { + it("entities.list dispatches cleanly", async () => { await expect(sdk.entities.list()).resolves.toBeDefined(); }); - pgOnlyIt("entitySchema.listTypes dispatches cleanly", async () => { + it("entitySchema.listTypes dispatches cleanly", async () => { await expect(sdk.entitySchema.listTypes()).resolves.toBeDefined(); }); @@ -74,11 +65,11 @@ describe("ClientSDK namespace dispatch (read paths)", () => { await expect(sdk.entitySchema.listRelTypes()).resolves.toBeDefined(); }); - pgOnlyIt("connections.list dispatches cleanly", async () => { + it("connections.list dispatches cleanly", async () => { await expect(sdk.connections.list()).resolves.toBeDefined(); }); - pgOnlyIt( + it( "connections.listConnectorDefinitions dispatches cleanly", async () => { await expect( @@ -87,42 +78,41 @@ describe("ClientSDK namespace dispatch (read paths)", () => { }, ); - pgOnlyIt("feeds.list dispatches cleanly", async () => { + it("feeds.list dispatches cleanly", async () => { await expect(sdk.feeds.list()).resolves.toBeDefined(); }); - pgOnlyIt("authProfiles.list dispatches cleanly", async () => { + it("authProfiles.list dispatches cleanly", async () => { await expect(sdk.authProfiles.list()).resolves.toBeDefined(); }); - pgOnlyIt("operations.listAvailable dispatches cleanly", async () => { + it("operations.listAvailable dispatches cleanly", async () => { await expect(sdk.operations.listAvailable()).resolves.toBeDefined(); }); - // NOTE: operations.listRuns trips a pre-existing handler bug on PGlite - // (un-awaited SQL fragment injected as $1). Covered separately once that - // handler is fixed — the wrapper dispatch itself is asserted by the - // listAvailable test above. + // NOTE: the wrapper dispatch itself is asserted by the listAvailable test + // above; operations.listRuns result-shape is covered by the operations + // suite, not duplicated here. - pgOnlyIt("watchers.list dispatches cleanly", async () => { + it("watchers.list dispatches cleanly", async () => { await expect(sdk.watchers.list()).resolves.toBeDefined(); }); - pgOnlyIt("classifiers.list dispatches cleanly", async () => { + it("classifiers.list dispatches cleanly", async () => { await expect(sdk.classifiers.list()).resolves.toBeDefined(); }); - pgOnlyIt("organizations.list dispatches cleanly", async () => { + it("organizations.list dispatches cleanly", async () => { const orgs = await sdk.organizations.list(); expect(Array.isArray(orgs)).toBe(true); }); - pgOnlyIt("organizations.current returns the session org", async () => { + it("organizations.current returns the session org", async () => { const current = await sdk.organizations.current(); expect(current.slug).toBe("dispatch-sdk"); }); - pgOnlyIt("knowledge.search dispatches cleanly", async () => { + it("knowledge.search dispatches cleanly", async () => { await expect( sdk.knowledge.search({ query: "nothing-here-likely" }), ).resolves.toBeDefined(); diff --git a/packages/server/src/__tests__/server-lifecycle.test.ts b/packages/server/src/__tests__/server-lifecycle.test.ts index 65ea9a4ba..5f89c7a7e 100644 --- a/packages/server/src/__tests__/server-lifecycle.test.ts +++ b/packages/server/src/__tests__/server-lifecycle.test.ts @@ -2,7 +2,7 @@ * Contract tests for the shared server lifecycle spine. * * The point of these tests is to lock the invariants that drift between - * `server.ts` (Postgres) and `start-local.ts` (PGlite) used to break (issue + * `server.ts` (Postgres) and `start-local.ts` (embedded Postgres) used to break (issue * #948 + the #943 7-hygiene catch-up): * * 1. Middleware ordering on the Hono wrapper: diff --git a/packages/server/src/__tests__/setup/embedded-postgres-backend.ts b/packages/server/src/__tests__/setup/embedded-postgres-backend.ts new file mode 100644 index 000000000..ef6f5d04f --- /dev/null +++ b/packages/server/src/__tests__/setup/embedded-postgres-backend.ts @@ -0,0 +1,62 @@ +/** + * Ephemeral embedded-Postgres backend for tests. + * + * Spawns a real PostgreSQL 18 (embedded-postgres) on a throwaway datadir and + * returns a plain DATABASE_URL any postgres.js client can use — so `make test` + * needs no external Postgres, exactly like `lobu run`. Same binary + pgvector + * injection as the production embedded path (src/embedded-runtime.ts), so tests + * exercise the real engine (prepared statements, multi-conn pool, LISTEN/NOTIFY, + * cube/earthdistance, pgvector) with no PGlite-specific quirks. + */ + +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { injectPgvector, resolveEmbeddedNativeDir } from '@lobu/pgvector-embedded'; +import EmbeddedPostgres from 'embedded-postgres'; + +export interface EmbeddedBackend { + url: string; + stop: () => Promise; +} + +let active: EmbeddedBackend | null = null; + +/** + * Start an ephemeral embedded Postgres and return a connectable DATABASE_URL. + * Idempotent: repeated calls return the same instance until `stop()` runs. + */ +export async function startEmbeddedBackend(): Promise { + if (active) return active; + + injectPgvector(resolveEmbeddedNativeDir()); + + const dataDir = mkdtempSync(join(tmpdir(), 'lobu-test-pg-')); + // 0 lets the OS assign; embedded-postgres needs a concrete port, so pick a + // high random one and let a collision fail loudly rather than silently share. + const port = 50000 + Math.floor(Math.random() * 10000); + const pg = new EmbeddedPostgres({ + databaseDir: dataDir, + user: 'postgres', + password: 'postgres', + port, + persistent: false, + }); + + await pg.initialise(); + await pg.start(); + + const url = `postgresql://postgres:postgres@127.0.0.1:${port}/postgres?sslmode=disable`; + active = { + url, + stop: async () => { + try { + await pg.stop(); + } finally { + rmSync(dataDir, { recursive: true, force: true }); + active = null; + } + }, + }; + return active; +} diff --git a/packages/server/src/__tests__/setup/global-setup.ts b/packages/server/src/__tests__/setup/global-setup.ts index 8e367e19c..6fb7935f6 100644 --- a/packages/server/src/__tests__/setup/global-setup.ts +++ b/packages/server/src/__tests__/setup/global-setup.ts @@ -1,36 +1,20 @@ /** * Global Test Setup * - * Runs once before all tests to set up the test database. Supports two - * interchangeable backends so the same integration tests can execute against - * either: + * Runs once before all tests. One backend story: + * - If DATABASE_URL is set → use that Postgres (CI, or a local one you pin). + * - Otherwise → spawn an ephemeral embedded Postgres (real PG 18 + pgvector), + * so `make test` needs no external database — same engine as `lobu run`. * - * - `postgres` (default) — external Postgres via DATABASE_URL. Matches the - * historical test contract; full-suite compatible. - * - `pglite` (opt-in via `pnpm test:pglite` / LOBU_TEST_BACKEND=pglite) — - * ephemeral in-memory PGlite + socket server. Zero external dependencies. - * Currently reliable for targeted runs (e.g. the PostgresSecretStore - * suite); the full integration suite under a single vitest worker still - * exhausts the PGlite socket's connection pool, so it's not yet the - * default. - * - * The rest of the test suite is backend-agnostic: it reads DATABASE_URL and - * uses postgres.js, so migrations, fixtures, and assertions are reused as-is. + * The suite is backend-agnostic: it reads DATABASE_URL and uses postgres.js, so + * migrations, fixtures, and assertions are identical either way. */ import { closeDbSingleton } from '../../db/client'; -import { type PgliteBackend, startPgliteBackend } from './pglite-backend'; +import { type EmbeddedBackend, startEmbeddedBackend } from './embedded-postgres-backend'; import { closeTestDb, setupTestDatabase } from './test-db'; -let pglite: PgliteBackend | null = null; - -function resolveBackend(): 'pglite' | 'postgres' { - const explicit = process.env.LOBU_TEST_BACKEND?.trim().toLowerCase(); - if (explicit === 'pglite' || explicit === 'postgres') return explicit; - // Default to external Postgres — matches the historical test contract. - // Opt into PGlite explicitly via `pnpm test:pglite`. - return 'postgres'; -} +let embedded: EmbeddedBackend | null = null; export async function setup(): Promise { if (process.env.SKIP_TEST_DB_SETUP === '1') { @@ -38,27 +22,16 @@ export async function setup(): Promise { return; } - const backend = resolveBackend(); - - if (backend === 'pglite') { - console.log('\n🧬 Starting ephemeral PGlite backend for tests...'); - pglite = await startPgliteBackend(); - process.env.DATABASE_URL = pglite.url; - // Matches the production embedded path in src/start-local.ts — the - // PGlite socket doesn't support SSL negotiation or prepared statements. - process.env.PGSSLMODE = 'disable'; - process.env.LOBU_DISABLE_PREPARE = '1'; - console.log(`✅ PGlite ready at ${pglite.url}`); - } else { - const databaseUrl = process.env.DATABASE_URL?.trim(); - if (!databaseUrl) { - throw new Error( - 'LOBU_TEST_BACKEND=postgres requires DATABASE_URL. ' + - 'Example: DATABASE_URL=postgresql://postgres:postgres@127.0.0.1:5433/lobu_test' - ); - } + const databaseUrl = process.env.DATABASE_URL?.trim(); + if (databaseUrl) { process.env.DATABASE_URL = databaseUrl; - console.log(`\n🗄️ Using external Postgres at ${databaseUrl}`); + console.log(`\n🗄️ Using Postgres at ${databaseUrl}`); + } else { + console.log('\n🐘 No DATABASE_URL — spawning ephemeral embedded Postgres...'); + embedded = await startEmbeddedBackend(); + process.env.DATABASE_URL = embedded.url; + process.env.PGSSLMODE = 'disable'; + console.log(`✅ Embedded Postgres ready at ${embedded.url}`); } // Deterministic 32-byte hex key for AES-256-GCM in tests. Same value the @@ -77,8 +50,8 @@ export async function setup(): Promise { } export async function teardown(): Promise { - if (pglite) { - await pglite.stop(); - pglite = null; + if (embedded) { + await embedded.stop(); + embedded = null; } } diff --git a/packages/server/src/__tests__/setup/pglite-backend.ts b/packages/server/src/__tests__/setup/pglite-backend.ts deleted file mode 100644 index 19ca4c417..000000000 --- a/packages/server/src/__tests__/setup/pglite-backend.ts +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Ephemeral PGlite backend for tests. - * - * Starts an in-memory PGlite instance fronted by PGLiteSocketServer so any - * postgres.js client (test code, app code, migrations) can talk to it via a - * plain DATABASE_URL. Lets the same integration tests run against either - * real Postgres or PGlite without branching in the test code itself. - * - * Shape mirrors the production embedded path in src/start-local.ts so the - * two stay behaviorally aligned (same extensions, same SSL/prepare flags). - */ - -import { PGlite } from '@electric-sql/pglite'; -import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm'; -import { vector } from '@electric-sql/pglite/vector'; -import { postgis } from '@electric-sql/pglite-postgis'; -import { PGLiteSocketServer } from '@electric-sql/pglite-socket'; - -function readPositiveIntEnv(name: string, fallback: number): number { - const raw = process.env[name]?.trim(); - if (!raw) return fallback; - const parsed = Number.parseInt(raw, 10); - return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback; -} - -function isTruthyEnv(name: string): boolean { - return /^(1|true|yes|on)$/i.test(process.env[name]?.trim() ?? ''); -} - -const SOCKET_MAX_CONNECTIONS = readPositiveIntEnv('LOBU_PGLITE_SOCKET_MAX_CONNECTIONS', 64); -const SOCKET_IDLE_TIMEOUT_MS = readPositiveIntEnv('LOBU_PGLITE_SOCKET_IDLE_TIMEOUT_MS', 0); -const SOCKET_DEBUG = isTruthyEnv('LOBU_PGLITE_SOCKET_DEBUG'); - -export interface PgliteBackend { - url: string; - stop: () => Promise; -} - -let active: PgliteBackend | null = null; - -/** - * Start an ephemeral PGlite + socket server and return a DATABASE_URL any - * postgres.js client can connect to. Idempotent: repeated calls return the - * same instance until `stop()` runs. - */ -export async function startPgliteBackend(): Promise { - if (active) return active; - - const db = await PGlite.create({ - // No dataDir → purely in-memory; tests are hermetic and leave no trace. - // postgis is an experimental WASM bundle (@electric-sql/pglite-postgis, - // v0.0.7 at time of writing). We register it here so the - // geo-enrichment migration runs the full path under test instead of - // tripping the DO-block fallback that production self-hosters - // without PostGIS depend on. Keeps unit + integration coverage - // aligned with what prod actually executes. - extensions: { vector, pg_trgm, postgis }, - }); - - const socketServer = new PGLiteSocketServer({ - db, - port: 0, // ephemeral; the listening event reports the real port - maxConnections: SOCKET_MAX_CONNECTIONS, - idleTimeout: SOCKET_IDLE_TIMEOUT_MS, - debug: SOCKET_DEBUG, - }); - - socketServer.addEventListener('error', (event: Event) => { - const detail = (event as CustomEvent).detail; - console.error('[pglite-backend] socket server error', detail); - }); - socketServer.addEventListener('close', () => { - if (SOCKET_DEBUG) { - console.warn('[pglite-backend] socket server closed'); - } - }); - if (SOCKET_DEBUG) { - socketServer.addEventListener('connection', (event: Event) => { - const detail = (event as CustomEvent).detail; - console.log('[pglite-backend] socket connection', { - detail, - stats: socketServer.getStats(), - }); - }); - } - - const port = await new Promise((resolve, reject) => { - const timer = setTimeout( - () => reject(new Error('PGlite socket server did not start within 10s')), - 10_000 - ); - socketServer.addEventListener('listening', (event: Event) => { - clearTimeout(timer); - const detail = (event as CustomEvent).detail as { port?: number } | undefined; - if (typeof detail?.port === 'number') { - resolve(detail.port); - } else { - reject(new Error('PGlite listening event missing port')); - } - }); - void socketServer.start(); - }); - - // sslmode=disable is required — the socket doesn't speak SSL. - const url = `postgresql://postgres@127.0.0.1:${port}/postgres?sslmode=disable`; - - active = { - url, - stop: async () => { - try { - await socketServer.stop(); - } finally { - await db.close(); - active = null; - } - }, - }; - return active; -} diff --git a/packages/server/src/__tests__/setup/test-db.ts b/packages/server/src/__tests__/setup/test-db.ts index 631b7b77f..62eebd845 100644 --- a/packages/server/src/__tests__/setup/test-db.ts +++ b/packages/server/src/__tests__/setup/test-db.ts @@ -59,15 +59,9 @@ export function getTestDb(): postgres.Sql { 'Example: DATABASE_URL=postgresql://localhost:5432/lobu_test' ); } - // The PGlite socket server is happiest with very few, short-lived - // connections (same reason `db/client.ts` pins the embedded pool to 1). - // A 5-connection pool that lingers 20s after idle churns the socket and - // has been observed to drop connections mid-suite (ECONNRESET in the - // cleanup hook). Against real Postgres the wider pool is fine. - const isPglite = process.env.LOBU_DISABLE_PREPARE === '1'; sql = postgres(url, { - max: isPglite ? 1 : 5, - idle_timeout: isPglite ? 0 : 20, + max: 5, + idle_timeout: 20, // Integration tests trigger many CASCADE/TRUNCATE notices; suppress them to // reduce noisy output and hook slowdowns. onnotice: () => {}, diff --git a/packages/server/src/benchmarks/memory/adapters/lobu-inprocess.ts b/packages/server/src/benchmarks/memory/adapters/lobu-inprocess.ts index cd23ea294..86a673149 100644 --- a/packages/server/src/benchmarks/memory/adapters/lobu-inprocess.ts +++ b/packages/server/src/benchmarks/memory/adapters/lobu-inprocess.ts @@ -1,5 +1,5 @@ import { performance } from 'node:perf_hooks'; -import { type PgliteBackend, startPgliteBackend } from '../../../__tests__/setup/pglite-backend'; +import { type EmbeddedBackend, startEmbeddedBackend } from '../../../__tests__/setup/embedded-postgres-backend'; import { cleanupTestDatabase, getTestDb, @@ -119,10 +119,10 @@ function splitConversationSession(content: string): SplitConversation | null { return { header, turns }; } -let pgliteBackend: PgliteBackend | null = null; +let embeddedBackend: EmbeddedBackend | null = null; let databaseReady = false; -async function ensurePglite(): Promise { +async function ensureDatabase(): Promise { const benchmarkDatabaseUrl = process.env.LOBU_BENCHMARK_DATABASE_URL?.trim(); if (benchmarkDatabaseUrl) { @@ -131,11 +131,10 @@ async function ensurePglite(): Promise { process.env.ENCRYPTION_KEY = process.env.ENCRYPTION_KEY ?? '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; - } else if (!pgliteBackend) { - pgliteBackend = await startPgliteBackend(); - process.env.DATABASE_URL = pgliteBackend.url; + } else if (!embeddedBackend) { + embeddedBackend = await startEmbeddedBackend(); + process.env.DATABASE_URL = embeddedBackend.url; process.env.PGSSLMODE = 'disable'; - process.env.LOBU_DISABLE_PREPARE = '1'; process.env.ENCRYPTION_KEY = process.env.ENCRYPTION_KEY ?? '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; @@ -188,7 +187,7 @@ export class LobuInprocessBenchmarkAdapter implements BenchmarkAdapter { } async reset(_ctx: TrialContext): Promise { - await ensurePglite(); + await ensureDatabase(); await cleanupTestDatabase(); clearMcpSessions(); this.entityIds.clear(); @@ -339,13 +338,13 @@ export class LobuInprocessBenchmarkAdapter implements BenchmarkAdapter { } async dispose(): Promise { - if (!pgliteBackend) return; - // Close the postgres.js singleton pool BEFORE shutting down pglite's socket - // server. Otherwise idle connections in the pool outlive the socket and + if (!embeddedBackend) return; + // Close the postgres.js singleton pool BEFORE stopping the embedded + // Postgres. Otherwise idle connections in the pool outlive the server and // any follow-up query rejects with ECONNREFUSED as an unhandled rejection. await closeDbSingleton(); - await pgliteBackend.stop(); - pgliteBackend = null; + await embeddedBackend.stop(); + embeddedBackend = null; databaseReady = false; } diff --git a/packages/server/src/db/client.ts b/packages/server/src/db/client.ts index ce4b34334..aef6ea746 100644 --- a/packages/server/src/db/client.ts +++ b/packages/server/src/db/client.ts @@ -26,10 +26,6 @@ export interface DbClient { end?: () => Promise; } -export function simpleQuery(query: DbQuery): DbQuery { - return query; -} - /** * Format a JS string array as a PostgreSQL array literal. * @@ -100,28 +96,15 @@ const PG_OID_JSONB = 3802; // ========================================================= interface CreatedDbClient { - /** Client used by application code; in pglite mode this is queue-serialized. */ + /** Client used by application code. */ wrapped: DbClient; - /** Raw postgres.js Sql client. Bypasses the serialization queue — only safe - * for callers that own their own connection (e.g. Kysely via reserve()). */ + /** Raw postgres.js Sql client (same instance as `wrapped`); named for the + * call sites that need the full Sql surface, e.g. Kysely via reserve(). */ raw: Sql; } function createDbClient(connectionString: string, maxConnections?: number): CreatedDbClient { - const embeddedCompatMode = process.env.LOBU_DISABLE_PREPARE === '1'; - - // PGlite's socket server can't safely interleave queries that postgres.js - // (and Kysely's reserve() path used by better-auth) pipeline across multiple - // connections — concurrent requests collide on the unnamed prepared statement - // and crash with "bind message supplies N parameters, but prepared statement - // requires M". Pin the embedded pool to a single connection so everything - // serializes. With a single connection, named prepared statements (postgres.js - // default) are safe again — and necessary: `prepare: false` mangles the - // dynamically-composed `sql` fragments used by tools like manage_connections - // into "syntax error at or near \"$1\"" on PGlite. - const poolMax = embeddedCompatMode - ? 1 - : (maxConnections ?? parseInt(process.env.DB_POOL_MAX || '20', 10)); + const poolMax = maxConnections ?? parseInt(process.env.DB_POOL_MAX || '20', 10); const rawClient = postgres(connectionString, { max: poolMax, @@ -174,9 +157,7 @@ function createDbClient(connectionString: string, maxConnections?: number): Crea }, }); - // Always hand back the raw postgres.js client. In embedded mode the pool is - // already pinned to 1 connection (above), which serializes queries at the - // connection level — so no JS-side serialization wrapper is needed. An earlier + // Hand back the raw postgres.js client directly. An earlier serialization // wrapper broke postgres.js fragment nesting (`sql`${query} AND …``) by // returning a Promise instead of a PendingQuery, which surfaced as // "syntax error at or near \"$1\"" from tools like manage_connections. @@ -238,12 +219,7 @@ export async function closeDbSingleton(): Promise { * Kysely dialect bound to the singleton postgres.js client. Used by better-auth * so that auth queries share the same connection pool as the rest of the app * instead of opening a second pg.Pool with its own (cold-prone) connections. - * - * Uses the raw (un-wrapped) postgres.js client because PostgresJSDialect calls - * sql.reserve() to acquire a dedicated connection — a code path the in-process - * pglite serialization wrapper doesn't proxy. This matches the pre-refactor - * behavior where better-auth ran on its own pg.Pool entirely outside the - * wrapper, so pglite tests that exercise auth retain their existing semantics. + * PostgresJSDialect calls sql.reserve() to acquire a dedicated connection. */ export function getAuthDialect(): PostgresJSDialect { ensureSingleton(); diff --git a/packages/server/src/dev-vite.ts b/packages/server/src/dev-vite.ts index 112ec70a7..2156d3e8a 100644 --- a/packages/server/src/dev-vite.ts +++ b/packages/server/src/dev-vite.ts @@ -1,8 +1,8 @@ /** * Shared Vite dev-server middleware wiring. * - * Both server entry points use this in development: `server.ts` (external - * Postgres) and `start-local.ts` (embedded PGlite). It attaches a Vite dev + * `server.ts` uses this in development for both backends (external Postgres + * and embedded Postgres). It attaches a Vite dev * server in middleware mode to the given HTTP server so the SPA is served with * HMR, and falls unmatched requests through to the Hono listener. */ diff --git a/packages/server/src/embedded-runtime.ts b/packages/server/src/embedded-runtime.ts new file mode 100644 index 000000000..5fd51e96d --- /dev/null +++ b/packages/server/src/embedded-runtime.ts @@ -0,0 +1,297 @@ +/** + * Embedded-PostgreSQL runtime — lazy-loaded by `server.ts` ONLY when + * `DATABASE_URL` is a path / `file://` (local `lobu run`, the Mac app, tests). + * + * Everything heavy (the `embedded-postgres` binary, the pgvector injector) is + * pulled in via `await import(...)` inside `startEmbeddedRuntime`, so the + * external-Postgres path (prod) never resolves or loads them even though they + * sit in node_modules. Returns the mode-specific lifecycle hooks that + * `server.ts` hands to the shared `createServerLifecycle()` spine. + */ + +import { fork } from "node:child_process"; +import { existsSync } from "node:fs"; +import http from "node:http"; +import { createRequire } from "node:module"; +import { homedir } from "node:os"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { ensureDefaultAgent } from "./auth/default-provisioning"; +import { ensureInstallOperator } from "./auth/install-operator"; +import { + listMigrationFiles, + loadMigrationUpSection, +} from "./db/migration-loader"; +import logger from "./utils/logger"; + +const APP_ROOT = join(fileURLToPath(new URL(".", import.meta.url)), ".."); +const require = createRequire(import.meta.url); + +export interface EmbeddedRuntime { + /** TCP URL of the spawned cluster; already written to process.env.DATABASE_URL. */ + databaseUrl: string; + /** Cluster datadir, for the boot log. */ + dataDir: string; + databaseReadiness: () => Promise; + preListenHooks: Array<() => Promise | void>; + extraTeardown: Array<() => Promise | void>; +} + +/** + * Resolve the embedded data root from `DATABASE_URL` (a `file://` / path value; + * the CLI / Mac app inject it). The cluster lives at `/.lobu/pgdata`. + * A leading `~` is expanded. `DATABASE_URL` is the single source of truth — a + * postgres:// URL routes to the external path before this is ever called. + */ +function resolveDataRoot(): string { + const dbUrl = process.env.DATABASE_URL?.trim(); + if (!dbUrl) { + throw new Error( + "DATABASE_URL is required: a file:// path for embedded Postgres " + + "(e.g. file://~/.lobu) or a postgres:// URL for an external database.", + ); + } + let p = dbUrl.replace(/^file:(\/\/)?/i, ""); + if (p === "~" || p.startsWith("~/")) p = join(homedir(), p.slice(1)); + return p; +} + +function resolveExistingPath( + ...candidates: Array +): string | null { + for (const candidate of candidates) { + if (candidate && existsSync(candidate)) return candidate; + } + return null; +} + +function findFreePort(): Promise { + return new Promise((resolve, reject) => { + const srv = http.createServer(); + srv.listen(0, "127.0.0.1", () => { + const addr = srv.address(); + const port = typeof addr === "object" && addr ? addr.port : 0; + srv.close(() => resolve(port)); + }); + srv.on("error", reject); + }); +} + +/** + * Spawn an embedded PostgreSQL (injecting pgvector), set process.env.DATABASE_URL + * to its TCP URL, fork the embeddings child, and return the lifecycle hooks. + */ +export async function startEmbeddedRuntime(): Promise { + const dataRoot = resolveDataRoot(); + const pgDataDir = join(dataRoot, ".lobu", "pgdata"); + + // Embedded-only env defaults. Single-user mode: the embedded runner spawns + // its own DB, seeds one bootstrap user, and is used by exactly one operator + // on one machine — block extra /sign-up forks unless LOBU_SINGLE_USER=0. + process.env.PGSSLMODE = "disable"; + if (process.env.LOBU_SINGLE_USER === undefined) { + process.env.LOBU_SINGLE_USER = "1"; + } + + // Heavy deps stay behind dynamic import so the external/prod path never + // loads the embedded-postgres binary resolution or the pgvector injector. + const { default: EmbeddedPostgres } = await import("embedded-postgres"); + const { injectPgvector, resolveEmbeddedNativeDir } = await import( + "@lobu/pgvector-embedded" + ); + + // embedded-postgres bundles pg_trgm but not pgvector — inject the host + // platform's prebuilt vector library into the binary tree before boot + // (idempotent). cube + earthdistance are already in the stock binary. + injectPgvector(resolveEmbeddedNativeDir()); + + const pgPort = + parseInt(process.env.LOBU_PG_PORT || "", 10) || (await findFreePort()); + const pg = new EmbeddedPostgres({ + databaseDir: pgDataDir, + user: "postgres", + password: "postgres", + port: pgPort, + persistent: true, + }); + + // initdb refuses a non-empty datadir; skip it when the cluster already + // exists so restarts reuse the same data instead of erroring. + if (!existsSync(join(pgDataDir, "PG_VERSION"))) { + logger.info({ pgDataDir }, "Initialising embedded PostgreSQL cluster"); + await pg.initialise(); + } + await pg.start(); + + const databaseUrl = `postgresql://postgres:postgres@127.0.0.1:${pgPort}/postgres?sslmode=disable`; + process.env.DATABASE_URL = databaseUrl; + logger.info({ port: pgPort }, "Embedded PostgreSQL ready"); + + const embeddingsChild = await startEmbeddings(); + + return { + databaseUrl, + dataDir: pgDataDir, + databaseReadiness: () => runMigrations(databaseUrl), + preListenHooks: [ + // BEFORE listen so headless installs (CI, containers) sign in via + // better-auth without a chicken-and-egg /sign-up. Provisions the + // synthetic `install_operator` user; idempotent. Never crash boot. + async () => { + try { + await ensureInstallOperator(); + } catch (err) { + logger.error({ err }, "Install-operator provisioning failed"); + } + }, + // Default-agent provisioning: resolve the personal org id each boot so + // a returning user picks up the default agent. + async () => { + try { + const rows = (await import("postgres")) + .default(databaseUrl, { max: 1 }); + try { + const orgs = (await rows` + SELECT id FROM "organization" + WHERE (metadata::jsonb)->>'personal_org_for_user_id' IS NOT NULL + ORDER BY "createdAt" ASC LIMIT 1 + `) as unknown as Array<{ id: string }>; + const orgId = orgs[0]?.id ?? null; + if (orgId) await ensureDefaultAgent(orgId); + } finally { + await rows.end({ timeout: 1 }); + } + } catch (err) { + logger.warn({ err }, "Default-agent provisioning failed"); + } + }, + ], + // Runs after stopLobuGateway + closeDbSingleton so gateway connections + // release before the embeddings child + PG child are stopped. + extraTeardown: [ + () => { + embeddingsChild?.kill(); + }, + () => pg.stop(), + ], + }; +} + +async function runMigrations(databaseUrl: string): Promise { + // Same migrations dbmate uses for prod, applied unconditionally. The dir is + // a single squashed baseline + forward deltas; both replay idempotently + // (baseline gated by the schema_migrations ledger, deltas use IF NOT EXISTS). + const pg = await import("postgres"); + const sql = pg.default(databaseUrl, { max: 1 }); + + try { + const migrationsDir = resolveExistingPath( + // Published @lobu/cli copies migrations next to the bundle under dist/db/migrations. + join(fileURLToPath(new URL(".", import.meta.url)), "db", "migrations"), + join(APP_ROOT, "db", "migrations"), + join(APP_ROOT, "..", "..", "db", "migrations"), + join(process.cwd(), "db", "migrations"), + join(process.cwd(), "..", "..", "db", "migrations"), + ); + if (!migrationsDir) throw new Error("Migrations directory not found."); + + await sql.unsafe(` + CREATE TABLE IF NOT EXISTS public.schema_migrations ( + version character varying(128) NOT NULL PRIMARY KEY + ) + `); + + const appliedRows = (await sql.unsafe( + `SELECT version FROM public.schema_migrations`, + )) as Array<{ version: string }>; + const applied = new Set(appliedRows.map((r) => r.version)); + + // The squashed baseline uses plain CREATE TABLE/FUNCTION, so replaying it + // against an already-migrated DB raises duplicate-object SQLSTATEs; treat + // those as the no-op success case for the baseline only. Forward deltas + // must use IF NOT EXISTS rather than relying on this fallback. + const IDEMPOTENT_BASELINE_VERSIONS = new Set(["00000000000000"]); + + logger.info("Running migrations..."); + for (const file of listMigrationFiles(migrationsDir)) { + const version = file.split("_")[0] ?? ""; + if (applied.has(version)) continue; + const migrationSql = loadMigrationUpSection(migrationsDir, file); + if (!migrationSql) continue; + + await sql.unsafe("SET search_path TO public"); + try { + await sql.unsafe(migrationSql); + } catch (err) { + const code = (err as { code?: string } | null)?.code; + const isDuplicateObject = + code === "42723" || code === "42P07" || code === "42710"; + if (!isDuplicateObject || !IDEMPOTENT_BASELINE_VERSIONS.has(version)) { + throw err; + } + logger.info( + { migration: file, version, pgErrorCode: code }, + "Migration already applied (idempotent skip)", + ); + } + await sql` + INSERT INTO public.schema_migrations (version) VALUES (${version}) + ON CONFLICT DO NOTHING + `; + } + logger.info("Migrations complete"); + } finally { + await sql.end(); + } +} + +async function startEmbeddings(): Promise | null> { + const embeddingsPort = parseInt(process.env.EMBEDDINGS_PORT || "0", 10); + const publishedServerPath = (() => { + try { + return fileURLToPath(import.meta.resolve("@lobu/embeddings/server")); + } catch { + return null; + } + })(); + const serverPath = resolveExistingPath( + join(APP_ROOT, "packages", "embeddings", "src", "server.ts"), + join(process.cwd(), "packages", "embeddings", "src", "server.ts"), + ...(publishedServerPath ? [publishedServerPath] : []), + ); + if (!serverPath) { + logger.warn( + "Embeddings service not found — embedding generation will not be available", + ); + return null; + } + + const port = embeddingsPort || (await findFreePort()); + let execArgv: string[] = []; + if (serverPath.endsWith(".ts")) { + const tsxPackageJson = require.resolve("tsx/package.json"); + execArgv = ["--import", join(dirname(tsxPackageJson), "dist", "loader.mjs")]; + } + + const child = fork(serverPath, [], { + execArgv, + env: { ...process.env, PORT: String(port) }, + stdio: ["ignore", "pipe", "pipe", "ipc"], + }); + process.env.EMBEDDINGS_SERVICE_URL = `http://127.0.0.1:${port}`; + + child.stdout?.on("data", (data: Buffer) => { + const msg = data.toString().trim(); + if (msg) logger.info({ service: "embeddings" }, msg); + }); + child.stderr?.on("data", (data: Buffer) => { + const msg = data.toString().trim(); + if (msg) logger.warn({ service: "embeddings" }, msg); + }); + child.on("exit", (code) => { + if (code !== 0 && code !== null) { + logger.warn({ code }, "Embeddings service exited"); + } + }); + return child; +} diff --git a/packages/server/src/gateway/__tests__/agent-history-routes.test.ts b/packages/server/src/gateway/__tests__/agent-history-routes.test.ts index 5ffc014a6..7c2086e36 100644 --- a/packages/server/src/gateway/__tests__/agent-history-routes.test.ts +++ b/packages/server/src/gateway/__tests__/agent-history-routes.test.ts @@ -14,7 +14,7 @@ import { UserAgentsStore } from "../auth/user-agents-store.js"; import { createAgentHistoryRoutes } from "../routes/public/agent-history.js"; import { setAuthProvider } from "../routes/public/settings-auth.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -26,7 +26,7 @@ describe("agent history routes", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/agent-routes.test.ts b/packages/server/src/gateway/__tests__/agent-routes.test.ts index 22055b48d..704b6296f 100644 --- a/packages/server/src/gateway/__tests__/agent-routes.test.ts +++ b/packages/server/src/gateway/__tests__/agent-routes.test.ts @@ -7,7 +7,7 @@ import { UserAgentsStore } from "../auth/user-agents-store.js"; import { createAgentRoutes } from "../routes/public/agents.js"; import { setAuthProvider } from "../routes/public/settings-auth.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -20,7 +20,7 @@ describe("agent routes", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/agent-settings-store.test.ts b/packages/server/src/gateway/__tests__/agent-settings-store.test.ts index 8d2352a67..8563ecab7 100644 --- a/packages/server/src/gateway/__tests__/agent-settings-store.test.ts +++ b/packages/server/src/gateway/__tests__/agent-settings-store.test.ts @@ -3,7 +3,7 @@ import { createPostgresAgentConfigStore } from "../../lobu/stores/postgres-store import { orgContext } from "../../lobu/stores/org-context.js"; import { AgentSettingsStore } from "../auth/settings/agent-settings-store.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -14,7 +14,7 @@ describe("AgentSettingsStore", () => { let store: AgentSettingsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/agent-transcript-snapshot.test.ts b/packages/server/src/gateway/__tests__/agent-transcript-snapshot.test.ts index 0fd1e2221..d4f188643 100644 --- a/packages/server/src/gateway/__tests__/agent-transcript-snapshot.test.ts +++ b/packages/server/src/gateway/__tests__/agent-transcript-snapshot.test.ts @@ -1,17 +1,11 @@ /** * Integration tests for the per-run agent_transcript_snapshot path. * - * Backed by the ephemeral PGlite gateway harness (`ensurePgliteForGatewayTests`). + * Backed by the embedded Postgres gateway harness (`ensureDbForGatewayTests`). * Covers the gateway-side surface: HTTP snapshot routes, advisory lock, * /agent-history fallback resolver, and schema constraints. The worker-side * helpers (hydrate / writeSnapshot) are tested in * `packages/agent-worker/src/openclaw/__tests__/transcript-snapshot.test.ts`. - * - * Test-isolation note: PGlite pins postgres.js to a single connection. The - * cross-pod advisory lock cannot be exercised end-to-end here (the second - * acquire would block forever on the same connection); the embedded-mode - * no-op path is asserted instead, and the genuine cross-pod race is covered - * by the dual-psql repro in the PR body. */ import { @@ -27,20 +21,19 @@ import { Hono } from "hono"; import { getDb } from "../../db/client.js"; import { UserAgentsStore } from "../auth/user-agents-store.js"; import { createTranscriptRoutes } from "../gateway/transcript-routes.js"; -import { acquireConversationLock } from "../orchestration/impl/embedded-deployment.js"; import { createAgentHistoryRoutes, readLatestSnapshotJsonl, } from "../routes/public/agent-history.js"; import { setAuthProvider } from "../routes/public/settings-auth.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -705,49 +698,6 @@ describe("agent_transcript_snapshot — /agent-history fallback", () => { }); }); -describe("agent_transcript_snapshot — advisory lock helper", () => { - test.skipIf(process.env.LOBU_DISABLE_PREPARE !== "1")("lock-no-op-in-embedded-mode: PGlite-pinned pool returns sentinel without reserving", async () => { - // Embedded mode pins the postgres.js pool to a single connection; the - // real reserve()-based path would block forever. The helper detects - // LOBU_DISABLE_PREPARE=1 (set by ensurePgliteForGatewayTests) and - // returns a no-op release. The genuine cross-pod path is asserted in - // the PR body's dual-psql repro. Skipped against real Postgres - // (CI integration job) — the sentinel-mode assertion does not hold - // there since sequential acquires on the same key would block. - - const a = await acquireConversationLock("org_lock_a", "agent-x", "conv-x"); - expect(a).not.toBeNull(); - // No real lock held → second acquire on the same key also succeeds. - const b = await acquireConversationLock("org_lock_a", "agent-x", "conv-x"); - expect(b).not.toBeNull(); - await a!.release(); - await b!.release(); - }); - - test.skipIf(process.env.LOBU_DISABLE_PREPARE !== "1")("lock-cross-conv-parallelism (embedded sentinel): different (org,agent,conv) acquire independently", async () => { - // Asserts the helper's keying — even in embedded sentinel mode the - // call shape passes through and each acquire/release pairs cleanly. - // The real-PG path uses pg_try_advisory_lock(int32, int32) where each - // unique (org,agent,conv) hashes to a distinct key2. Skipped against - // real Postgres (CI integration job) — without the embedded sentinel - // shortcut, the cap+reserve path could collide with the lock counter - // state pre-set by other tests; the cross-pod parallelism property - // is tested at the lock keying layer (hashConvKey2) not here. - const a = await acquireConversationLock("org_x", "agent-x", "conv-A"); - const b = await acquireConversationLock("org_x", "agent-x", "conv-B"); - const c = await acquireConversationLock("org_x", "agent-y", "conv-A"); - const d = await acquireConversationLock("org_y", "agent-x", "conv-A"); - expect(a).not.toBeNull(); - expect(b).not.toBeNull(); - expect(c).not.toBeNull(); - expect(d).not.toBeNull(); - await a!.release(); - await b!.release(); - await c!.release(); - await d!.release(); - }); -}); - describe("agent_transcript_snapshot — schema", () => { test("terminal_status CHECK constraint accepts valid and rejects invalid", async () => { const orgId = await seedAgentRow("agent-schema", { diff --git a/packages/server/src/gateway/__tests__/base-deployment-grants.test.ts b/packages/server/src/gateway/__tests__/base-deployment-grants.test.ts index 98173a0a5..5a2435ccf 100644 --- a/packages/server/src/gateway/__tests__/base-deployment-grants.test.ts +++ b/packages/server/src/gateway/__tests__/base-deployment-grants.test.ts @@ -8,7 +8,7 @@ import { import { GrantStore } from "../permissions/grant-store.js"; import { PolicyStore } from "../permissions/policy-store.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -76,7 +76,7 @@ describe("BaseDeploymentManager.syncNetworkConfigGrants", () => { let manager: TestDeploymentManager; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/chat-instance-manager-boot.test.ts b/packages/server/src/gateway/__tests__/chat-instance-manager-boot.test.ts index bb97b403b..d563e7897 100644 --- a/packages/server/src/gateway/__tests__/chat-instance-manager-boot.test.ts +++ b/packages/server/src/gateway/__tests__/chat-instance-manager-boot.test.ts @@ -20,12 +20,12 @@ * throws (saveConnection calls getOrgId() strict) and the boot loop * crashes silently, masking the underlying failure. * - * Uses PGlite via the shared gateway test harness; no network. + * Uses the embedded Postgres gateway test harness; no network. */ import { beforeAll, beforeEach, describe, expect, test } from "bun:test"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -34,7 +34,7 @@ const TEST_ENCRYPTION_KEY = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); process.env.ENCRYPTION_KEY = TEST_ENCRYPTION_KEY; }); diff --git a/packages/server/src/gateway/__tests__/chat-instance-manager-slack.test.ts b/packages/server/src/gateway/__tests__/chat-instance-manager-slack.test.ts index 0397a86fe..8fd0a6f5c 100644 --- a/packages/server/src/gateway/__tests__/chat-instance-manager-slack.test.ts +++ b/packages/server/src/gateway/__tests__/chat-instance-manager-slack.test.ts @@ -1,6 +1,6 @@ import { beforeAll, describe, expect, mock, test } from "bun:test"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -18,7 +18,7 @@ const TEST_ENCRYPTION_KEY = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); async function loadChatInstanceManager() { diff --git a/packages/server/src/gateway/__tests__/connection-routes.test.ts b/packages/server/src/gateway/__tests__/connection-routes.test.ts index 932dca608..a8ff6fd46 100644 --- a/packages/server/src/gateway/__tests__/connection-routes.test.ts +++ b/packages/server/src/gateway/__tests__/connection-routes.test.ts @@ -13,7 +13,7 @@ import { UserAgentsStore } from "../auth/user-agents-store.js"; import { createConnectionCrudRoutes } from "../routes/public/connections.js"; import { setAuthProvider } from "../routes/public/settings-auth.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -25,7 +25,7 @@ describe("connection routes", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/core-services-store-selection.test.ts b/packages/server/src/gateway/__tests__/core-services-store-selection.test.ts index a0aa84f28..0f2935b9b 100644 --- a/packages/server/src/gateway/__tests__/core-services-store-selection.test.ts +++ b/packages/server/src/gateway/__tests__/core-services-store-selection.test.ts @@ -10,7 +10,7 @@ import { import { InMemoryStateAdapter } from "./fixtures/in-memory-state-adapter.js"; import { ensureEncryptionKey, - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; import { MockMessageQueue } from "./setup.js"; @@ -120,7 +120,7 @@ class InMemoryWritableStore implements WritableSecretStore { } beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); afterEach(() => { diff --git a/packages/server/src/gateway/__tests__/grant-store.test.ts b/packages/server/src/gateway/__tests__/grant-store.test.ts index 0f7dbb68b..8faa6e73a 100644 --- a/packages/server/src/gateway/__tests__/grant-store.test.ts +++ b/packages/server/src/gateway/__tests__/grant-store.test.ts @@ -2,7 +2,7 @@ import { beforeAll, beforeEach, describe, expect, test } from "bun:test"; import { orgContext } from "../../lobu/stores/org-context.js"; import { GrantStore } from "../permissions/grant-store.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -23,7 +23,7 @@ describe("GrantStore (PG-backed)", () => { let store: GrantStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/helpers/db-setup.ts b/packages/server/src/gateway/__tests__/helpers/db-setup.ts index 703529e2b..da15a6885 100644 --- a/packages/server/src/gateway/__tests__/helpers/db-setup.ts +++ b/packages/server/src/gateway/__tests__/helpers/db-setup.ts @@ -2,9 +2,9 @@ * Bun:test PG harness shared across the gateway test suite. * * Store tests in this directory read/write Postgres directly, so callers - * need a real DB. We boot an ephemeral PGlite once per test process the - * first time `ensurePgliteForGatewayTests()` is called, run migrations, and - * reuse it for the rest of the suite. + * need a real DB. The first time `ensureDbForGatewayTests()` is called we use + * DATABASE_URL if set, else spawn an ephemeral embedded Postgres once per test + * process, run migrations, and reuse it for the rest of the suite. * * Tests that don't need PG (pure helpers, classification logic, etc.) can * skip calling this entirely and pay no cost. @@ -12,9 +12,9 @@ import { closeDbSingleton, getDb } from "../../../db/client.js"; import { - startPgliteBackend, - type PgliteBackend, -} from "../../../__tests__/setup/pglite-backend.js"; + type EmbeddedBackend, + startEmbeddedBackend, +} from "../../../__tests__/setup/embedded-postgres-backend.js"; import { cleanupTestDatabase, closeTestDb, @@ -22,22 +22,21 @@ import { } from "../../../__tests__/setup/test-db.js"; let initPromise: Promise | null = null; -let backend: PgliteBackend | null = null; +let backend: EmbeddedBackend | null = null; /** - * Idempotent. Starts PGlite + runs migrations on first call, returns the + * Idempotent. Starts the DB + runs migrations on first call, returns the * same Promise on every subsequent call. Tests should `await` it from a * `beforeAll` — repeated calls are cheap. */ -export function ensurePgliteForGatewayTests(): Promise { +export function ensureDbForGatewayTests(): Promise { if (initPromise) return initPromise; initPromise = (async () => { if (!process.env.DATABASE_URL) { - backend = await startPgliteBackend(); + backend = await startEmbeddedBackend(); process.env.DATABASE_URL = backend.url; process.env.PGSSLMODE = "disable"; - process.env.LOBU_DISABLE_PREPARE = "1"; } if (!process.env.ENCRYPTION_KEY) { process.env.ENCRYPTION_KEY = diff --git a/packages/server/src/gateway/__tests__/instruction-service.test.ts b/packages/server/src/gateway/__tests__/instruction-service.test.ts index be99fb178..217683330 100644 --- a/packages/server/src/gateway/__tests__/instruction-service.test.ts +++ b/packages/server/src/gateway/__tests__/instruction-service.test.ts @@ -3,7 +3,7 @@ import { createPostgresAgentConfigStore } from "../../lobu/stores/postgres-store import { AgentSettingsStore } from "../auth/settings/agent-settings-store.js"; import { InstructionService } from "../services/instruction-service.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; @@ -12,7 +12,7 @@ describe("InstructionService", () => { let service: InstructionService; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/interaction-bridge-action-handlers.test.ts b/packages/server/src/gateway/__tests__/interaction-bridge-action-handlers.test.ts index ab80b9d10..254fd8925 100644 --- a/packages/server/src/gateway/__tests__/interaction-bridge-action-handlers.test.ts +++ b/packages/server/src/gateway/__tests__/interaction-bridge-action-handlers.test.ts @@ -2,7 +2,7 @@ import { beforeAll, beforeEach, describe, expect, mock, test } from "bun:test"; import { storePendingTool, type PendingToolInvocation } from "../auth/mcp/pending-tool-store.js"; import { registerActionHandlers } from "../connections/interaction-bridge.js"; import type { PlatformConnection } from "../connections/types.js"; -import { ensurePgliteForGatewayTests, resetTestDatabase } from "./helpers/db-setup.js"; +import { ensureDbForGatewayTests, resetTestDatabase } from "./helpers/db-setup.js"; type ActionHandler = (event: any) => Promise; @@ -89,7 +89,7 @@ async function seedPending(requestId: string): Promise { describe("registerActionHandlers — tool approval", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -252,7 +252,7 @@ describe("registerActionHandlers — tool approval", () => { describe("registerActionHandlers — question (no callback)", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -282,7 +282,7 @@ describe("registerActionHandlers — question (no callback)", () => { describe("registerActionHandlers — question (with onQuestionClick)", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -365,7 +365,7 @@ describe("registerActionHandlers — question (with onQuestionClick)", () => { describe("registerActionHandlers — guards", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); let h: Harness; diff --git a/packages/server/src/gateway/__tests__/interaction-bridge-slack-webhook.test.ts b/packages/server/src/gateway/__tests__/interaction-bridge-slack-webhook.test.ts index 5b8f4d97b..848dbd3e6 100644 --- a/packages/server/src/gateway/__tests__/interaction-bridge-slack-webhook.test.ts +++ b/packages/server/src/gateway/__tests__/interaction-bridge-slack-webhook.test.ts @@ -5,7 +5,7 @@ import { getDb } from "../../db/client.js"; import { storePendingTool, type PendingToolInvocation } from "../auth/mcp/pending-tool-store.js"; import { registerActionHandlers } from "../connections/interaction-bridge.js"; import type { PlatformConnection } from "../connections/types.js"; -import { ensurePgliteForGatewayTests, resetTestDatabase } from "./helpers/db-setup.js"; +import { ensureDbForGatewayTests, resetTestDatabase } from "./helpers/db-setup.js"; import { InMemoryStateAdapter } from "./fixtures/in-memory-state-adapter.js"; import { blockActionsPayload, @@ -88,7 +88,7 @@ async function waitFor( describe("Slack block_actions → registerActionHandlers (Tier B integration)", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/mcp-proxy-edge-cases.test.ts b/packages/server/src/gateway/__tests__/mcp-proxy-edge-cases.test.ts index 75e77580e..e53ca4e6b 100644 --- a/packages/server/src/gateway/__tests__/mcp-proxy-edge-cases.test.ts +++ b/packages/server/src/gateway/__tests__/mcp-proxy-edge-cases.test.ts @@ -120,10 +120,10 @@ let agent1Token: string; let agent2Token: string; beforeAll(async () => { - const { ensurePgliteForGatewayTests, seedAgentRow } = await import( + const { ensureDbForGatewayTests, seedAgentRow } = await import( "./helpers/db-setup.js" ); - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); await seedAgentRow("agent1"); await seedAgentRow("agent2"); diff --git a/packages/server/src/gateway/__tests__/mcp-proxy.test.ts b/packages/server/src/gateway/__tests__/mcp-proxy.test.ts index 8ed39da5b..2a957b15b 100644 --- a/packages/server/src/gateway/__tests__/mcp-proxy.test.ts +++ b/packages/server/src/gateway/__tests__/mcp-proxy.test.ts @@ -101,13 +101,13 @@ let validToken: string; let originalFetch: typeof fetch; beforeAll(async () => { - // GrantStore is now PG-backed; bring up an ephemeral PGlite for the + // GrantStore is now PG-backed; bring up an ephemeral embedded Postgres for the // tool-approval tests below. Seed `agent1` so the grants FK accepts // inserts keyed on it. - const { ensurePgliteForGatewayTests, seedAgentRow } = await import( + const { ensureDbForGatewayTests, seedAgentRow } = await import( "./helpers/db-setup.js" ); - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); await seedAgentRow("agent1"); originalEnv = process.env.ENCRYPTION_KEY; process.env.ENCRYPTION_KEY = TEST_ENCRYPTION_KEY; diff --git a/packages/server/src/gateway/__tests__/multi-tenant-isolation-reproducers.test.ts b/packages/server/src/gateway/__tests__/multi-tenant-isolation-reproducers.test.ts index 24d8826f1..9079ef44a 100644 --- a/packages/server/src/gateway/__tests__/multi-tenant-isolation-reproducers.test.ts +++ b/packages/server/src/gateway/__tests__/multi-tenant-isolation-reproducers.test.ts @@ -39,7 +39,7 @@ import { } from "../proxy/secret-proxy.js"; import type { SecretStore } from "../secrets/index.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -293,7 +293,7 @@ describe("[finding 1] lookupPlaceholderMapping enforces caller's expected org", describe("[finding 2] GrantStore queries scope to caller's organization id", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -474,7 +474,7 @@ describe("[finding 2] GrantStore queries scope to caller's organization id", () describe("[finding 4] ChatInstanceManager.initialize refuses Telegram polling rows in cloud", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/oauth-state-store.test.ts b/packages/server/src/gateway/__tests__/oauth-state-store.test.ts index bcb3031f0..b4c613763 100644 --- a/packages/server/src/gateway/__tests__/oauth-state-store.test.ts +++ b/packages/server/src/gateway/__tests__/oauth-state-store.test.ts @@ -6,13 +6,13 @@ import { sweepExpiredOAuthStates, } from "../auth/oauth/state-store.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; describe("OAuthStateStore (Postgres-backed)", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/pending-interaction-cleanup.test.ts b/packages/server/src/gateway/__tests__/pending-interaction-cleanup.test.ts index 831e4c5e6..465884cf6 100644 --- a/packages/server/src/gateway/__tests__/pending-interaction-cleanup.test.ts +++ b/packages/server/src/gateway/__tests__/pending-interaction-cleanup.test.ts @@ -25,7 +25,7 @@ import { } from "../connections/pending-interaction-store.js"; import { InteractionService, type PostedQuestion } from "../interactions.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; @@ -57,7 +57,7 @@ function buildQuestion(id: string, userId = USER_A): PostedQuestion { describe("pending-interaction-store cleanup paths", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { await resetTestDatabase(); diff --git a/packages/server/src/gateway/__tests__/pending-interaction-store.test.ts b/packages/server/src/gateway/__tests__/pending-interaction-store.test.ts index 6a01550a5..e07639656 100644 --- a/packages/server/src/gateway/__tests__/pending-interaction-store.test.ts +++ b/packages/server/src/gateway/__tests__/pending-interaction-store.test.ts @@ -17,7 +17,7 @@ */ import { beforeAll, beforeEach, describe, expect, test } from "bun:test"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; import { getDb } from "../../db/client.js"; @@ -59,7 +59,7 @@ function buildQuestion(id: string, userId = USER_A): PostedQuestion { describe("pending-interaction-store", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { await resetTestDatabase(); diff --git a/packages/server/src/gateway/__tests__/reserve-cap.test.ts b/packages/server/src/gateway/__tests__/reserve-cap.test.ts index c7dfc8e16..d1b2c981e 100644 --- a/packages/server/src/gateway/__tests__/reserve-cap.test.ts +++ b/packages/server/src/gateway/__tests__/reserve-cap.test.ts @@ -7,7 +7,7 @@ * gateway is to exhausting the postgres-js pool with per-conversation * reservations. * - * Validated against PGlite via the gateway test harness. + * Validated against the embedded Postgres gateway test harness. */ import { @@ -25,12 +25,12 @@ import { setReservedLockCountForTests, } from "../orchestration/impl/embedded-deployment.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -45,54 +45,13 @@ afterEach(() => { }); describe("acquireConversationLock: reserved-connection cap and metric", () => { - /** - * The full lock path uses `sql.reserve()`, which under PGlite would block - * because the embedded pool is pinned to a single connection. Instead we - * exercise the cap with `LOBU_DISABLE_PREPARE=1` (which is already set by - * the gateway harness) so `acquireConversationLock` returns the - * embedded-mode no-op sentinel without touching the counter — and then - * directly drive the counter via a sibling code path that talks to the - * cap. The cap and counter still need to work outside the embedded - * shortcut, so we temporarily clear LOBU_DISABLE_PREPARE for these tests - * and assert the cap rejection before any `sql.reserve()` runs. - * - * Concretely: set the cap to 2, override the env to take the non-embedded - * branch, but stub out the reserve so we don't actually attach a real - * connection. We do this by setting the cap to 0 — which forces an - * immediate `null` return — and asserting the metric stays at 0. - */ + // The cap check runs BEFORE any `sql.reserve()`, so these tests stage the + // in-process counter directly and assert the cap-rejection branch returns + // `null` without attaching a real connection — backend-agnostic. test("cap exhaustion returns null and does not increment the counter", async () => { - const prevDisable = process.env.LOBU_DISABLE_PREPARE; - delete process.env.LOBU_DISABLE_PREPARE; process.env.LOBU_MAX_RESERVED_LOCKS = "0"; - try { - const lock = await acquireConversationLock( - "org-a", - "agent-a", - "conv-a" - ); - expect(lock).toBeNull(); - expect(getReservedLockCount()).toBe(0); - } finally { - if (prevDisable !== undefined) { - process.env.LOBU_DISABLE_PREPARE = prevDisable; - } - } - }); - - test("embedded mode returns a no-op sentinel without touching the counter", async () => { - // Only meaningful under PGlite (`LOBU_DISABLE_PREPARE=1`). Real-PG CI - // runs this same suite against a postgres container without the - // embedded mode signal, in which case `acquireConversationLock` falls - // through to the cap+reserve path and the assertions below don't - // apply. - if (process.env.LOBU_DISABLE_PREPARE !== "1") { - return; - } const lock = await acquireConversationLock("org-a", "agent-a", "conv-a"); - expect(lock).not.toBeNull(); - expect(getReservedLockCount()).toBe(0); - await lock!.release(); + expect(lock).toBeNull(); expect(getReservedLockCount()).toBe(0); }); @@ -101,13 +60,6 @@ describe("acquireConversationLock: reserved-connection cap and metric", () => { }); test("cap rejects when counter has been staged at or above cap", async () => { - // PGlite pins us to a single connection, so we can't drive `sql.reserve()` - // end-to-end. Stage the counter directly to prove the cap branch - // rejects when the count already sits at the cap — the production code - // path increments the counter from the same place and observes the - // same check. - const prevDisable = process.env.LOBU_DISABLE_PREPARE; - delete process.env.LOBU_DISABLE_PREPARE; process.env.LOBU_MAX_RESERVED_LOCKS = "2"; try { setReservedLockCountForTests(2); @@ -116,13 +68,9 @@ describe("acquireConversationLock: reserved-connection cap and metric", () => { // Counter unchanged — the cap check returned before the increment. expect(getReservedLockCount()).toBe(2); - // Staging the counter back below the cap "frees a slot"; the next - // call should no longer hit the cap rejection. We can't observe the - // post-reserve success path under PGlite without blocking, but we - // can confirm `null` is no longer returned at the cap check — by - // dropping to 1 and re-bumping cap to 1 so the next call falls back - // to the same null path. (One-off matrix instead of chasing real - // reserve().) + // Drop below the cap and re-bump the cap to 1 so the next call still + // hits the cap-rejection branch (counter == cap) — confirms the check + // tracks the counter without needing a real reserve(). setReservedLockCountForTests(1); process.env.LOBU_MAX_RESERVED_LOCKS = "1"; const stillRejected = await acquireConversationLock( @@ -134,9 +82,6 @@ describe("acquireConversationLock: reserved-connection cap and metric", () => { expect(getReservedLockCount()).toBe(1); } finally { setReservedLockCountForTests(0); - if (prevDisable !== undefined) { - process.env.LOBU_DISABLE_PREPARE = prevDisable; - } } }); }); diff --git a/packages/server/src/gateway/__tests__/rest-api-hardening.test.ts b/packages/server/src/gateway/__tests__/rest-api-hardening.test.ts index a14d98a38..77ce74776 100644 --- a/packages/server/src/gateway/__tests__/rest-api-hardening.test.ts +++ b/packages/server/src/gateway/__tests__/rest-api-hardening.test.ts @@ -39,7 +39,7 @@ import { createAgentRoutes } from "../routes/public/agents.js"; import { createSlackRoutes } from "../routes/public/slack.js"; import { setAuthProvider } from "../routes/public/settings-auth.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -84,7 +84,7 @@ describe("auth: missing and expired sessions", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -239,7 +239,7 @@ describe("cross-org isolation: agents cannot leak across organizations", () => { let agentMetadataStoreB: AgentMetadataStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -364,7 +364,7 @@ describe("agent CRUD: access control and input validation", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -591,7 +591,7 @@ describe("connection routes: access control", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -741,7 +741,7 @@ describe("slack routes: OAuth callback and replay protection", () => { let sessionOrgId: string | null; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -990,7 +990,7 @@ describe("input validation: agentId format and edge cases", () => { let userAgentsStore: UserAgentsStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/revoked-token-store.test.ts b/packages/server/src/gateway/__tests__/revoked-token-store.test.ts index 685036bf4..d74e68371 100644 --- a/packages/server/src/gateway/__tests__/revoked-token-store.test.ts +++ b/packages/server/src/gateway/__tests__/revoked-token-store.test.ts @@ -13,7 +13,7 @@ import { } from "../routes/public/settings-auth.js"; import { ensureEncryptionKey, - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; @@ -21,7 +21,7 @@ describe("RevokedTokenStore (PG-backed)", () => { let store: RevokedTokenStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -64,7 +64,7 @@ describe("RevokedTokenStore (PG-backed)", () => { describe("createApiAuthMiddleware — worker token revocation", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -128,7 +128,7 @@ describe("verifySettingsSession — jti revocation", () => { } beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -206,7 +206,7 @@ describe("verifySettingsSession — jti revocation", () => { // the singleton store; this test pins that contract. describe("authenticateWorker (internal middleware) — revocation reach", () => { beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/runs-queue-integration.test.ts b/packages/server/src/gateway/__tests__/runs-queue-integration.test.ts index d6f1c75f5..21c5c7aff 100644 --- a/packages/server/src/gateway/__tests__/runs-queue-integration.test.ts +++ b/packages/server/src/gateway/__tests__/runs-queue-integration.test.ts @@ -1,15 +1,13 @@ /** - * Integration tests for RunsQueue against a real Postgres (PGlite in CI). + * Integration tests for RunsQueue against a real Postgres. * * Covers the production behaviors that unit-level mocking cannot exercise — * SKIP LOCKED concurrency, graceful shutdown release, priority + expires_at + * retryDelay options, startup recovery scan. * - * PGlite is a single-process WASM Postgres so the SKIP LOCKED concurrency - * test cannot exercise real cross-process contention. We assert the - * single-process behavior is correct; the production guarantee (FOR UPDATE - * SKIP LOCKED is row-locked at the heap-tuple level) is unchanged because - * the SQL is the same. + * The SKIP LOCKED concurrency test drives multiple pooled connections against + * the real embedded Postgres; the production guarantee (FOR UPDATE SKIP LOCKED + * is row-locked at the heap-tuple level) holds because the SQL is identical. */ import { @@ -24,14 +22,14 @@ import { import { RunsQueue } from "../infrastructure/queue/runs-queue.js"; import { getDb } from "../../db/client.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; let queue: RunsQueue | null = null; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { @@ -48,7 +46,7 @@ afterEach(async () => { }); afterAll(async () => { - // No global teardown — db-setup.ts owns the PGlite lifecycle. + // No global teardown — db-setup.ts owns the embedded Postgres lifecycle. }); describe("RunsQueue — SKIP LOCKED claim concurrency", () => { diff --git a/packages/server/src/gateway/__tests__/slack-routes.test.ts b/packages/server/src/gateway/__tests__/slack-routes.test.ts index 1efd85053..07789c1e3 100644 --- a/packages/server/src/gateway/__tests__/slack-routes.test.ts +++ b/packages/server/src/gateway/__tests__/slack-routes.test.ts @@ -2,7 +2,7 @@ import { afterEach, beforeAll, beforeEach, describe, expect, mock, test } from " import { Hono } from "hono"; import { getDb } from "../../db/client.js"; import { createSlackRoutes } from "../routes/public/slack.js"; -import { ensurePgliteForGatewayTests, resetTestDatabase } from "./helpers/db-setup.js"; +import { ensureDbForGatewayTests, resetTestDatabase } from "./helpers/db-setup.js"; describe("slack routes", () => { const originalClientId = process.env.SLACK_CLIENT_ID; @@ -19,7 +19,7 @@ describe("slack routes", () => { let sessionOrgId: string | null; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/smoke-dispatch.test.ts b/packages/server/src/gateway/__tests__/smoke-dispatch.test.ts index 9228970c5..42eaa80c5 100644 --- a/packages/server/src/gateway/__tests__/smoke-dispatch.test.ts +++ b/packages/server/src/gateway/__tests__/smoke-dispatch.test.ts @@ -28,7 +28,7 @@ import { Hono } from "hono"; import { getDb } from "../../db/client.js"; import { createSmokeRoutes } from "../routes/internal/smoke.js"; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from "./helpers/db-setup.js"; @@ -54,7 +54,7 @@ function restoreEnv() { } beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/__tests__/user-auth-profile-store.test.ts b/packages/server/src/gateway/__tests__/user-auth-profile-store.test.ts index 827d12126..8e14a8656 100644 --- a/packages/server/src/gateway/__tests__/user-auth-profile-store.test.ts +++ b/packages/server/src/gateway/__tests__/user-auth-profile-store.test.ts @@ -3,7 +3,7 @@ import { PostgresSecretStore } from "../../lobu/stores/postgres-secret-store.js" import { UserAuthProfileStore } from "../auth/settings/user-auth-profile-store.js"; import { ensureEncryptionKey, - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, seedAgentRow, } from "./helpers/db-setup.js"; @@ -12,7 +12,7 @@ let secretStore: PostgresSecretStore; let store: UserAuthProfileStore; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); beforeEach(async () => { diff --git a/packages/server/src/gateway/infrastructure/queue/runs-queue.ts b/packages/server/src/gateway/infrastructure/queue/runs-queue.ts index 57b5b7445..d4b8a1f2b 100644 --- a/packages/server/src/gateway/infrastructure/queue/runs-queue.ts +++ b/packages/server/src/gateway/infrastructure/queue/runs-queue.ts @@ -812,11 +812,8 @@ export class RunsQueue implements IMessageQueue { this.staleSweepInFlight = true; try { const sql = getDb(); - // Threshold is a hard-coded constant; inline as a SQL literal so this - // query has zero placeholders. Tagged-template parameter interpolation - // here is unnecessary and trips a PGlite quirk where parameterized - // RETURNING queries occasionally surface as "supplies N parameters but - // statement requires 0" under embedded-compat (prepare:false). + // Threshold is a hard-coded constant, so inline it as a SQL literal — + // no placeholders needed. const thresholdMs = CLAIM_VISIBILITY_TIMEOUT_MS; const result = await sql.unsafe( `UPDATE public.runs diff --git a/packages/server/src/gateway/orchestration/impl/embedded-deployment.ts b/packages/server/src/gateway/orchestration/impl/embedded-deployment.ts index 3a86a059a..eb1c03b16 100644 --- a/packages/server/src/gateway/orchestration/impl/embedded-deployment.ts +++ b/packages/server/src/gateway/orchestration/impl/embedded-deployment.ts @@ -206,8 +206,7 @@ export function resetReservedLockCountForTests(): void { * Force the internal counter to a specific value. Test-only — production * code MUST go through `acquireConversationLock` so increment+decrement * pair via the canonical path. Used by the cap-enforcement test which - * needs to stage the counter without actually consuming PG connections - * (PGlite pins us to a single shared connection). + * needs to stage the counter without actually consuming PG connections. */ export function setReservedLockCountForTests(value: number): void { reservedLockCount = Math.max(0, value); @@ -226,25 +225,17 @@ export function setReservedLockCountForTests(value: number): void { * steal the conversation mid-run. The `sql.reserve()` connection is * dedicated and lock state survives until we explicitly release. * - * No-op in embedded mode (`LOBU_DISABLE_PREPARE=1`). Embedded mode pins the - * pg pool to a single connection (see `createDbClient` in db/client.ts), so - * `reserve()` would block any sibling query forever. Embedded also can't - * have multi-pod races by definition — the in-process `workers` Map (see - * `spawnDeployment` above) already gates per-conversation concurrency. - * Returning a no-op release lets the caller treat all modes uniformly. + * The local embedded backend takes this same real path now that it runs on a + * real multi-connection Postgres (no single-connection pin). In a single + * process the lock is uncontended and the in-process `workers` Map (see + * `spawnDeployment` above) is the primary per-conversation gate; the advisory + * lock is the cross-pod gate that matters in clustered deployments. */ export async function acquireConversationLock( organizationId: string, agentId: string, conversationId: string ): Promise<{ release: () => Promise } | null> { - if (process.env.LOBU_DISABLE_PREPARE === "1") { - // Embedded mode: in-process Map is the sole gate. Return a sentinel so - // the caller's wiring (entry.releaseConvLock, exit handler, etc.) - // stays uniform. - return { release: async () => {} }; - } - // Hard cap on reserved connections held across all live workers. Each lock // pins one postgres-js pool slot for the worker's lifetime; without a cap // multi-pod × multi-conversation pressure exhausts the pool and stalls diff --git a/packages/server/src/lobu/__tests__/agent-routes-apply.test.ts b/packages/server/src/lobu/__tests__/agent-routes-apply.test.ts index a951c8694..b4c65bb15 100644 --- a/packages/server/src/lobu/__tests__/agent-routes-apply.test.ts +++ b/packages/server/src/lobu/__tests__/agent-routes-apply.test.ts @@ -13,7 +13,7 @@ import { beforeAll, beforeEach, describe, expect, mock, test } from 'bun:test'; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from '../../gateway/__tests__/helpers/db-setup.js'; @@ -143,7 +143,7 @@ const ORG_A = 'org-a'; const ORG_B = 'org-b'; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); }); async function importAgentRoutes() { diff --git a/packages/server/src/lobu/stores/__tests__/postgres-secret-store.test.ts b/packages/server/src/lobu/stores/__tests__/postgres-secret-store.test.ts index c8a3d840d..be5993e8c 100644 --- a/packages/server/src/lobu/stores/__tests__/postgres-secret-store.test.ts +++ b/packages/server/src/lobu/stores/__tests__/postgres-secret-store.test.ts @@ -1,6 +1,6 @@ /** * PostgresSecretStore tests — runs against whichever backend globalSetup - * selected. With `pnpm test` the backend is ephemeral PGlite; with + * selected. With `pnpm test` the backend is ephemeral embedded Postgres; with * `pnpm test:pg` (or when DATABASE_URL is set explicitly) it's real * Postgres. Tests are written once and verified under both paths. */ diff --git a/packages/server/src/scheduled/__tests__/stale-run-reaper.test.ts b/packages/server/src/scheduled/__tests__/stale-run-reaper.test.ts index 417bd2df2..38d5bb2aa 100644 --- a/packages/server/src/scheduled/__tests__/stale-run-reaper.test.ts +++ b/packages/server/src/scheduled/__tests__/stale-run-reaper.test.ts @@ -1,6 +1,6 @@ /** * Integration test for the connector-lane stale-run reaper. Seeds three - * connector runs into PGlite and asserts the reaper only fails the one that + * connector runs into the test database and asserts the reaper only fails the one that * is in-progress with a stale `last_heartbeat_at`. Also exercises the * advisory-lock contention path: a second concurrent caller while the lock * is held no-ops instead of double-failing the row. @@ -9,7 +9,7 @@ import { afterAll, beforeAll, beforeEach, describe, expect, test } from 'bun:test'; import { getDb } from '../../db/client'; import { - ensurePgliteForGatewayTests, + ensureDbForGatewayTests, resetTestDatabase, } from '../../gateway/__tests__/helpers/db-setup'; import { reapStaleRuns } from '../check-stalled-executions'; @@ -18,7 +18,7 @@ const ORG_ID = 'reaper-org'; const STALE_THRESHOLD_SECONDS = 60; beforeAll(async () => { - await ensurePgliteForGatewayTests(); + await ensureDbForGatewayTests(); process.env.RUNS_REAPER_STALE_AFTER_SECONDS = String(STALE_THRESHOLD_SECONDS); }); @@ -159,12 +159,10 @@ describe('reapStaleRuns — connector lanes', () => { }); test('back-to-back calls do not double-fail the same row', async () => { - // The advisory-lock guards cross-pod contention. Under PGlite the - // single-connection pool serializes everything, so we can't simulate - // two pods literally racing the SELECT-then-UPDATE. What we CAN prove - // here is the function-level invariant the lock enforces: a row that's - // already been reaped doesn't get reaped a second time even if the - // sweeper fires again. + // The advisory-lock guards cross-pod contention. Rather than simulate two + // pods literally racing the SELECT-then-UPDATE, this proves the + // function-level invariant the lock enforces: a row that's already been + // reaped doesn't get reaped a second time even if the sweeper fires again. const staleId = await seedRun({ status: 'running', lastHeartbeatAgoSeconds: STALE_THRESHOLD_SECONDS * 3, diff --git a/packages/server/src/scheduled/jobs.ts b/packages/server/src/scheduled/jobs.ts index e6868f917..895693ca0 100644 --- a/packages/server/src/scheduled/jobs.ts +++ b/packages/server/src/scheduled/jobs.ts @@ -31,7 +31,7 @@ import { /** * Construct the TaskScheduler, register every periodic task, start dispatch, * and wire the lazy at-use-time refresh hooks into AuthProfilesManager. - * Single call site for both `server.ts` (prod) and `start-local.ts` (PGlite). + * Single call site for both `server.ts` (prod) and `start-local.ts` (embedded Postgres). */ export async function bootTaskScheduler( coreServices: CoreServices, diff --git a/packages/server/src/server-lifecycle.ts b/packages/server/src/server-lifecycle.ts index 5cb090d61..9fe67cb69 100644 --- a/packages/server/src/server-lifecycle.ts +++ b/packages/server/src/server-lifecycle.ts @@ -1,8 +1,9 @@ /** * Shared server lifecycle spine. * - * Both entry points — `server.ts` (Postgres) and `start-local.ts` (PGlite) — - * call into `createServerLifecycle()` so middleware ordering, route mounts, + * `server.ts` is the single entry for both backends (external Postgres and + * local embedded Postgres); it calls into `createServerLifecycle()` so + * middleware ordering, route mounts, * httpServer timeouts, shutdown sequence, and signal wiring stay identical * by construction. Drift between the two modes was the root cause of #948; * the only way to express a per-mode difference now is the four named hooks @@ -33,7 +34,7 @@ import { isSentryReported, markSentryReported } from "./sentry"; import logger from "./utils/logger"; import { initWorkspaceProvider } from "./workspace"; -export type ServerMode = "postgres" | "pglite"; +export type ServerMode = "postgres" | "embedded-postgres"; export interface ServerLifecycleConfig { mode: ServerMode; @@ -41,13 +42,14 @@ export interface ServerLifecycleConfig { host: string; port: number; /** - * Runs before workspace/gateway init. Postgres asserts the migrations - * ledger matches the bundled migrations dir; PGlite runs them. + * Runs before workspace/gateway init. External Postgres asserts the + * migrations ledger matches the bundled migrations dir; the embedded + * backend runs them. */ databaseReadiness: () => Promise; /** * Runs after gateway + scheduler boot, before `httpServer.listen()`. - * PGlite uses this for `ensureInstallOperator` + `ensureDefaultAgent`. + * The embedded backend uses this for `ensureInstallOperator` + `ensureDefaultAgent`. */ preListenHooks?: Array<() => Promise | void>; /** @@ -58,8 +60,8 @@ export interface ServerLifecycleConfig { postListenHooks?: Array<() => void>; /** * Runs during shutdown AFTER `stopLobuGateway` + `closeDbSingleton`, in - * declared order, before `httpServer.close()`. PGlite uses this for the - * embeddings child kill, socket-server stop, and PGlite db close. + * declared order, before `httpServer.close()`. The embedded backend uses + * this to kill the embeddings child and stop the embedded Postgres. */ extraTeardown?: Array<() => Promise | void>; } @@ -69,19 +71,6 @@ export interface ServerLifecycleHandles { start: () => Promise; } -/** - * Apply the LOBU_DEV_PROJECT_PATH fallback so downstream - * `buildGatewayConfig()` can derive worker paths even when the server is - * invoked from a package subdir (`cd packages/server && bun run dev`) or - * via `lobu run` from a project subdir. Both entries call this before - * lifecycle construction. - */ -export function applyDevProjectPathDefault(packageRepoRoot: string): void { - if (!process.env.LOBU_DEV_PROJECT_PATH) { - process.env.LOBU_DEV_PROJECT_PATH = packageRepoRoot; - } -} - /** * Defensive error → plain-object serializer for the top-level boot catch. * @@ -327,7 +316,7 @@ export function createServerLifecycle( } = config; const start = async (): Promise => { - // 1. Database readiness — Postgres asserts schema; PGlite runs migrations. + // 1. Database readiness — external PG asserts schema; embedded runs migrations. await databaseReadiness(); // 2. Workspace provider — required before gateway boot. @@ -361,7 +350,7 @@ export function createServerLifecycle( httpServer.on("request", honoListener); } - // 8. Pre-listen hooks (PGlite: install-operator + default-agent). + // 8. Pre-listen hooks (embedded: install-operator + default-agent). for (const hook of preListenHooks) { await hook(); } @@ -410,7 +399,7 @@ export function createServerLifecycle( await safe("stopLobuGateway", () => stopLobuGateway()); // f. Close the postgres.js singleton pool. await safe("closeDbSingleton", () => closeDbSingleton()); - // g. Mode-specific teardown (PGlite kills embeddings child, stops + // g. Mode-specific teardown (embedded kills embeddings child, stops // socket server, closes the in-process db). for (let i = 0; i < extraTeardown.length; i++) { await safe(`extraTeardown[${i}]`, extraTeardown[i]); diff --git a/packages/server/src/server.ts b/packages/server/src/server.ts index 952b52632..7dee36581 100644 --- a/packages/server/src/server.ts +++ b/packages/server/src/server.ts @@ -1,108 +1,154 @@ /** - * Node.js Server Entry Point (Postgres mode). + * Lobu server entry point — single entry for both backends. * - * Mode-specific bootstrap only. The shared spine - * (Hono wrapper, middleware, route mounts, httpServer timeouts, Vite, - * scheduler boot, signal handlers, shutdown ordering) lives in - * `./server-lifecycle.ts`. DO NOT add `new Hono`, `app.use`, `app.route`, - * `http.createServer`, or `process.on('SIGTERM' | 'SIGINT', …)` here — they - * belong in the lifecycle. + * DATABASE_URL selects the mode: + * - postgres:// URL → connect to an external Postgres (prod, or a DB you run) + * - a path / file:// → spawn a local embedded Postgres rooted there + * + * Embedded boot lives in `./embedded-runtime` and is loaded ONLY via + * `await import(...)` in the embedded branch, so the external/prod path never + * resolves or loads the embedded-postgres binary. Everything after the backend + * is chosen is identical — the shared spine (Hono wrapper, middleware, routes, + * httpServer timeouts, Vite, scheduler, signal handlers, shutdown ordering) + * lives in `./server-lifecycle.ts`. DO NOT add `new Hono`, `app.use`, + * `app.route`, `http.createServer`, or signal handlers here. */ -// Refuse to boot under an unsupported Node major (isolated-vm gate). The -// module performs the check on load, so this side-effect import MUST be the -// first one — ESM evaluates sibling imports in textual order, so anything -// above this line would otherwise run first and could itself crash on the -// unsupported runtime. +// Refuse to boot under an unsupported Node major (isolated-vm gate). The module +// asserts on load, so this side-effect import MUST be first. import "./utils/assert-node-version"; -// Sentry must init before any other imports for auto-instrumentation +// Sentry must init before any other imports for auto-instrumentation. import "./instrument"; import dotenv from "dotenv"; dotenv.config(); +// Mac-app / `lobu context server ...` settings from ~/.config/lobu/config.json. +// After dotenv (so project .env wins) and before main() reads DATABASE_URL / PORT. +import { applyUserServerConfigToEnv } from "./utils/user-config"; + +applyUserServerConfigToEnv(); + +import { randomBytes } from "node:crypto"; import { createRequire } from "node:module"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { assertExternalDepsResolvable } from "@lobu/connector-worker/compile"; import { getDb, probeListenNotify } from "./db/client"; -import { - applyDevProjectPathDefault, - createServerLifecycle, - reportBootFailure, -} from "./server-lifecycle"; +import { startEmbeddedRuntime } from "./embedded-runtime"; import { getEnvFromProcess } from "./utils/env"; import logger from "./utils/logger"; import { assertSchemaUpToDate } from "./utils/schema-version-check"; -// Resolve repo root from this source file: …/packages/server/src/server.ts → repo root. const PACKAGE_REPO_ROOT = path.resolve( fileURLToPath(new URL(".", import.meta.url)), "../../..", ); -applyDevProjectPathDefault(PACKAGE_REPO_ROOT); +/** DATABASE_URL is external iff it's a postgres:// URL; anything else → embedded. */ +function isExternal(databaseUrl: string | undefined): boolean { + return !!databaseUrl && /^postgres(ql)?:\/\//i.test(databaseUrl.trim()); +} async function main(): Promise { - const databaseUrl = process.env.DATABASE_URL?.trim(); - if (!databaseUrl) { - throw new Error( - "DATABASE_URL is required. Use a PostgreSQL connection string (for local dev run: pnpm dev:all).", - ); + const raw = process.env.DATABASE_URL?.trim(); + const external = isExternal(raw); + + if (!process.env.LOBU_DEV_PROJECT_PATH) { + // Downstream buildGatewayConfig() derives worker paths from this; without + // it, running from a project subdir gets a wrong cwd-relative resolve. + process.env.LOBU_DEV_PROJECT_PATH = PACKAGE_REPO_ROOT; } - process.env.DATABASE_URL = databaseUrl; - const env = getEnvFromProcess(); const port = parseInt(process.env.PORT || "8787", 10); - const host = process.env.HOST?.trim() || "0.0.0.0"; - - const databaseReadiness = async (): Promise => { - // Refuse to boot if the image expects a migration the database hasn't - // applied. Skippable via SKIP_SCHEMA_VERSION_CHECK=1 for emergency - // forward-flight (e.g. rolling back to an older image whose migrations - // dir is a strict prefix of what's already applied). See - // utils/schema-version-check.ts for the 2026-05-16 incident this guards. - if (process.env.SKIP_SCHEMA_VERSION_CHECK !== "1") { - const migrationsDir = - process.env.LOBU_MIGRATIONS_DIR?.trim() || - path.join(PACKAGE_REPO_ROOT, "db", "migrations"); - await assertSchemaUpToDate(getDb(), { migrationsDir }); - } else { - logger.warn( - "[schema-check] SKIP_SCHEMA_VERSION_CHECK=1 — skipping boot-time assertion", - ); - } + // External: bind all interfaces by default (containers). Embedded: loopback + // only — the local-init endpoint mints PATs with no auth challenge, so it + // must not be reachable from the LAN unless the operator sets HOST. + const host = + process.env.HOST?.trim() || (external ? "0.0.0.0" : "127.0.0.1"); + + let databaseReadiness: () => Promise; + let preListenHooks: Array<() => Promise | void> = []; + let extraTeardown: Array<() => Promise | void> = []; - // Verify LISTEN/NOTIFY actually delivers. This is a *detector*, not a - // gate: the runs-queue has a 200ms SKIP-LOCKED poll fallback that keeps - // the queue correct even when LISTEN is silently dropped (transaction-mode - // pgbouncer, RDS Proxy, etc.). Failing the probe just means wakeup - // latency degrades to the poll interval — not an outage. - if (process.env.SKIP_LISTEN_NOTIFY_PROBE !== "1") { - try { - await probeListenNotify(); - logger.info("[DB] LISTEN/NOTIFY probe ok"); - } catch (err) { + if (external) { + process.env.DATABASE_URL = raw; + databaseReadiness = async () => { + // Refuse to boot if the image expects a migration the DB hasn't applied. + // Skippable via SKIP_SCHEMA_VERSION_CHECK=1 for emergency forward-flight. + if (process.env.SKIP_SCHEMA_VERSION_CHECK !== "1") { + const migrationsDir = + process.env.LOBU_MIGRATIONS_DIR?.trim() || + path.join(PACKAGE_REPO_ROOT, "db", "migrations"); + await assertSchemaUpToDate(getDb(), { migrationsDir }); + } else { logger.warn( - { err }, - "[DB] LISTEN/NOTIFY probe failed — runs-queue will fall back to 200ms poll. Fix the pooler config to restore real-time wakeups.", + "[schema-check] SKIP_SCHEMA_VERSION_CHECK=1 — skipping boot-time assertion", ); } + // Detector (not a gate): the runs-queue has a 200ms SKIP-LOCKED poll + // fallback, so a dropped LISTEN (transaction-mode pooler) just degrades + // wakeup latency rather than causing an outage. + if (process.env.SKIP_LISTEN_NOTIFY_PROBE !== "1") { + try { + await probeListenNotify(); + logger.info("[DB] LISTEN/NOTIFY probe ok"); + } catch (err) { + logger.warn( + { err }, + "[DB] LISTEN/NOTIFY probe failed — runs-queue will fall back to 200ms poll.", + ); + } + } + }; + } else { + // Embedded/local conveniences — ephemeral secrets + localhost URLs so a + // bare `lobu run` works. (In prod these are always already set, so the + // guards no-op; this branch only runs for path/file:// DATABASE_URLs.) + if (!process.env.BETTER_AUTH_SECRET) { + process.env.BETTER_AUTH_SECRET = randomBytes(32).toString("base64"); + logger.info( + "Generated ephemeral BETTER_AUTH_SECRET — set in .env to persist sessions", + ); + } + if (!process.env.JWT_SECRET) { + process.env.JWT_SECRET = randomBytes(32).toString("base64"); + } + if (!process.env.PUBLIC_WEB_URL) { + process.env.PUBLIC_WEB_URL = `http://localhost:${port}`; } - }; + if (!process.env.NODE_ENV) { + process.env.NODE_ENV = "development"; + } + + // Lazy: pulls embedded-postgres + the pgvector injector ONLY here, spawns + // the cluster, sets process.env.DATABASE_URL to its TCP URL. + const rt = await startEmbeddedRuntime(); + databaseReadiness = rt.databaseReadiness; + preListenHooks = rt.preListenHooks; + extraTeardown = rt.extraTeardown; + logger.info(`Data: ${rt.dataDir}`); + } + + // Imported AFTER env + DATABASE_URL are finalised: the lifecycle's transitive + // imports (gateway, scheduler, ./index) evaluate at module load and expect a + // hot DATABASE_URL, which the embedded branch only sets above. + const { createServerLifecycle, reportBootFailure } = await import( + "./server-lifecycle" + ); const lifecycle = createServerLifecycle({ - mode: "postgres", - env, + mode: external ? "postgres" : "embedded-postgres", + env: getEnvFromProcess(), host, port, databaseReadiness, - // Crash loud if the runtime image is missing any connector external dep, - // instead of letting every feed silently fail with "Missing npm - // dependency: X" hours later. Run after listen() so the synchronous - // require.resolve walk doesn't add to cold-boot/readiness latency. + preListenHooks, + // Crash loud if the runtime image is missing a connector external dep, + // instead of letting feeds silently fail later. After listen() so the + // sync require.resolve walk doesn't add to readiness latency. postListenHooks: [ () => { try { @@ -113,9 +159,19 @@ async function main(): Promise { } }, ], + extraTeardown, }); - await lifecycle.start(); + try { + await lifecycle.start(); + } catch (err) { + reportBootFailure(err); + } } -main().catch(reportBootFailure); +main().catch(async (error) => { + // Lazy import so a crash in the env-setup block above (before the lifecycle + // import) still reaches stderr with the structured fallback logging. + const { reportBootFailure } = await import("./server-lifecycle"); + reportBootFailure(error); +}); diff --git a/packages/server/src/start-local.ts b/packages/server/src/start-local.ts deleted file mode 100644 index 0e446fb35..000000000 --- a/packages/server/src/start-local.ts +++ /dev/null @@ -1,461 +0,0 @@ -/** - * Local Server Entry Point (PGlite mode). - * - * Mode-specific bootstrap only: - * - apply user-config / forced env-var writes BEFORE anything reads env - * - start PGlite + socket server + run migrations - * - fork embeddings child - * - hand off to `createServerLifecycle()` for the shared spine - * - * The shared spine (Hono wrapper, middleware, route mounts, httpServer - * timeouts, Vite, scheduler boot, signal handlers, shutdown ordering) lives - * in `./server-lifecycle.ts`. DO NOT add `new Hono`, `app.use`, `app.route`, - * `http.createServer`, or `process.on('SIGTERM' | 'SIGINT', …)` here. - */ - -// Refuse to boot under an unsupported Node major (isolated-vm gate). Module -// asserts on load, so this must be the first import; see assert-node-version.ts. -import "./utils/assert-node-version"; - -// Sentry must init before any other imports for auto-instrumentation -// (postgres.js, http, etc.). No-op when SENTRY_DSN is unset, which is the -// common case for `lobu run` installs — the import is cheap. -import "./instrument"; - -import { fork } from "node:child_process"; -import { randomBytes } from "node:crypto"; -import { existsSync, mkdirSync } from "node:fs"; -import http from "node:http"; -import { createRequire } from "node:module"; -import { homedir } from "node:os"; -import { dirname, join } from "node:path"; -import { fileURLToPath } from "node:url"; - -import dotenv from "dotenv"; - -dotenv.config(); - -import { applyUserServerConfigToEnv } from "./utils/user-config"; - -// After dotenv (project .env) so .env wins; before the module-level DATA_DIR -// / PORT / HOST reads below so user-config overrides from -// ~/.config/lobu/config.json land in time. -// -// Managed context config only contributes PORT / HOST. External-Postgres -// routing happens upstream in `lobu run` via project `.env` or shell env. -applyUserServerConfigToEnv(); - -import { PGlite } from "@electric-sql/pglite"; -import { pg_trgm } from "@electric-sql/pglite/contrib/pg_trgm"; -import { vector } from "@electric-sql/pglite/vector"; -import { PGLiteSocketServer } from "@electric-sql/pglite-socket"; -import { assertExternalDepsResolvable } from "@lobu/connector-worker/compile"; -import { ensureDefaultAgent } from "./auth/default-provisioning"; -import { ensureInstallOperator } from "./auth/install-operator"; -import { - listMigrationFiles, - loadMigrationUpSection, -} from "./db/migration-loader"; -import { getEnvFromProcess } from "./utils/env"; -import logger from "./utils/logger"; - -const DATA_DIR = process.env.LOBU_DATA_DIR || join(homedir(), ".lobu", "data"); -const PORT = parseInt(process.env.PORT || "8787", 10); -// Loopback-only by default: the embedded local-runner ships a -// loopback-trust endpoint (`POST /api/local-init`) that mints worker-scoped -// PATs for the bootstrap user with no auth challenge. Binding to 0.0.0.0 -// would expose that to anyone on the LAN. Operators who explicitly want -// LAN/WAN reachability must set `HOST=0.0.0.0` themselves. -const HOST = process.env.HOST?.trim() || "127.0.0.1"; -const EMBEDDINGS_PORT = parseInt(process.env.EMBEDDINGS_PORT || "0", 10); -const APP_ROOT = join(fileURLToPath(new URL(".", import.meta.url)), ".."); -const PACKAGE_REPO_ROOT = join(APP_ROOT, "..", ".."); -const require = createRequire(import.meta.url); - -function resolveExistingPath( - ...candidates: Array -): string | null { - for (const candidate of candidates) { - if (candidate && existsSync(candidate)) { - return candidate; - } - } - return null; -} - -function readPositiveIntEnv(name: string, fallback: number): number { - const raw = process.env[name]?.trim(); - if (!raw) return fallback; - const parsed = Number.parseInt(raw, 10); - return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback; -} - -function isTruthyEnv(name: string): boolean { - return /^(1|true|yes|on)$/i.test(process.env[name]?.trim() ?? ""); -} - -async function main(): Promise { - mkdirSync(DATA_DIR, { recursive: true }); - - // Set all env vars FIRST — before any imports that might read them. The - // server-lifecycle module is imported dynamically below for exactly this - // reason: its transitive imports (`./index`, gateway, scheduler) read env - // at module-evaluation time, and pglite socket setup must finish first. - if (!process.env.BETTER_AUTH_SECRET) { - process.env.BETTER_AUTH_SECRET = randomBytes(32).toString("base64"); - logger.info( - "Generated ephemeral BETTER_AUTH_SECRET — set in .env to persist sessions", - ); - } - if (!process.env.JWT_SECRET) { - process.env.JWT_SECRET = randomBytes(32).toString("base64"); - } - if (!process.env.PUBLIC_WEB_URL) { - process.env.PUBLIC_WEB_URL = `http://localhost:${PORT}`; - } - if (!process.env.NODE_ENV) { - process.env.NODE_ENV = "development"; - } - process.env.PGSSLMODE = "disable"; - process.env.LOBU_DISABLE_PREPARE = "1"; - // Single-user mode default: the embedded runner spawns its own PGlite, - // seeds a single bootstrap user, and is expected to be used by exactly - // one operator on one machine. Block additional sign-ups so the - // operator can't accidentally fork into a second account (one for the - // Mac app + CLI, one for the web UI) by visiting /sign-up. Operators - // who actually want multi-user mode set LOBU_SINGLE_USER=0 explicitly. - if (process.env.LOBU_SINGLE_USER === undefined) { - process.env.LOBU_SINGLE_USER = "1"; - } - - if (!process.env.LOBU_DEV_PROJECT_PATH) { - // Mirror server.ts: downstream `buildGatewayConfig()` derives worker - // paths from LOBU_DEV_PROJECT_PATH. Without this fallback, users running - // `lobu run` from a project subdir get a wrong cwd-relative resolve. - process.env.LOBU_DEV_PROJECT_PATH = PACKAGE_REPO_ROOT; - } - - // ─── PGlite ────────────────────────────────────────────────── - - logger.info({ dataDir: DATA_DIR }, "Starting PGlite"); - const db = await PGlite.create({ - dataDir: DATA_DIR, - extensions: { vector, pg_trgm }, - }); - - // ─── PGlite Socket Server ──────────────────────────────────── - // Start socket FIRST, then run everything (including migrations) - // through it. No direct PGlite access after this point. - - const pgSocketPort = parseInt(process.env.PG_SOCKET_PORT || "0", 10); - const socketServer = new PGLiteSocketServer({ - db, - port: pgSocketPort, - maxConnections: readPositiveIntEnv( - "LOBU_PGLITE_SOCKET_MAX_CONNECTIONS", - 64, - ), - idleTimeout: readPositiveIntEnv("LOBU_PGLITE_SOCKET_IDLE_TIMEOUT_MS", 0), - debug: isTruthyEnv("LOBU_PGLITE_SOCKET_DEBUG"), - }); - socketServer.addEventListener("error", (event: Event) => { - logger.error( - { error: (event as CustomEvent).detail }, - "PGlite socket server error", - ); - }); - socketServer.addEventListener("close", () => { - logger.warn("PGlite socket server closed"); - }); - // Wait for listening event to get the actual port (especially when port=0) - const actualPgPort = await new Promise((resolve) => { - socketServer.addEventListener("listening", (e: Event) => { - resolve((e as CustomEvent).detail?.port ?? pgSocketPort); - }); - socketServer.start(); - }); - // sslmode=disable is required — PGlite socket doesn't support SSL negotiation - const dbUrl = `postgresql://postgres@127.0.0.1:${actualPgPort}/postgres?sslmode=disable`; - process.env.DATABASE_URL = dbUrl; - logger.info({ port: actualPgPort }, "PGlite socket server ready"); - - // ─── Embeddings Service (child process) ────────────────────── - - const embeddingsChild = await startEmbeddings(); - - // ─── Lifecycle ─────────────────────────────────────────────── - // Dynamic import: env mutation above must land before the lifecycle's - // transitive imports (gateway, scheduler, ./index) evaluate at module load. - // This collapses the previous fan-out of seven `await import(...)` sites - // (one per helper) into a single boundary. - const { createServerLifecycle, reportBootFailure } = await import( - "./server-lifecycle" - ); - - const env = getEnvFromProcess(); - - // Personal-org id for default-agent provisioning. Resolved once during the - // pre-listen phase rather than per-call, so the dynamic postgres import - // happens with a hot DATABASE_URL. - let personalOrgId: string | null = null; - - const lifecycle = createServerLifecycle({ - mode: "pglite", - env, - host: HOST, - port: PORT, - databaseReadiness: () => runMigrations(dbUrl), - preListenHooks: [ - // Runs BEFORE listen so headless installs (CI, containers, /tmp - // scaffolds without a browser) can sign in via better-auth without - // a chicken-and-egg /sign-up step. Provisions a synthetic - // `install_operator` user whose password is the install's - // ENCRYPTION_KEY. Idempotent — re-running on a boot where the - // operator already exists is a no-op. See - // `docs/install-operator-bootstrap.md`. - async () => { - try { - await ensureInstallOperator(); - } catch (err) { - logger.error({ err }, "Install-operator provisioning failed"); - // Don't crash the server — the operator only matters for headless - // installs; a browser-based signup still works. - } - }, - // Default-agent provisioning. Deferred to first-user creation in the - // `databaseHooks.user.create.after` hook; this resolves the personal - // org id on each boot so a returning user picks up the default agent. - async () => { - try { - const personalOrgRows = (await import("postgres")).default(dbUrl, { - max: 1, - }); - try { - const rows = (await personalOrgRows` - SELECT id FROM "organization" - WHERE (metadata::jsonb)->>'personal_org_for_user_id' IS NOT NULL - ORDER BY "createdAt" ASC LIMIT 1 - `) as unknown as Array<{ id: string }>; - personalOrgId = rows[0]?.id ?? null; - if (personalOrgId) await ensureDefaultAgent(personalOrgId); - } finally { - await personalOrgRows.end({ timeout: 1 }); - } - } catch (err) { - logger.warn({ err }, "Default-agent provisioning failed"); - } - }, - ], - // Mirror server.ts: crash loud if the runtime image is missing any - // connector external dep, instead of letting each feed silently fail - // with "Missing npm dependency: X" hours later. Runs after listen() so - // the sync require.resolve walk doesn't add to cold-boot latency. - // Without this hook, PGlite mode silently re-introduces the drift the - // refactor exists to prevent — flagged by pi review on #951. - postListenHooks: [ - () => { - try { - assertExternalDepsResolvable(require.resolve); - } catch (err) { - logger.error({ err }, "Connector external dependency check failed"); - process.exit(1); - } - }, - ], - // PGlite-specific teardown — runs after stopLobuGateway + closeDbSingleton - // so gateway's postgres.js connections release before the socket goes - // away underneath them. - extraTeardown: [ - () => { - embeddingsChild?.kill(); - }, - () => socketServer.stop(), - () => db.close(), - ], - }); - - try { - await lifecycle.start(); - logger.info(`Data: ${DATA_DIR}`); - } catch (err) { - // Bridge to reportBootFailure so PGlite-mode boot crashes get the same - // structured + plain-text fallback logging as Postgres mode. - reportBootFailure(err); - } -} - -// ─── Migrations ────────────────────────────────────────────────── - -async function runMigrations(dbUrl: string): Promise { - // Embedded boot runs the same migrations dbmate uses for prod, applied - // unconditionally. After the schema squash (2026-05-19), the migrations - // dir is a single baseline + any forward deltas; both are idempotent - // enough to replay on a pre-initialized DB: - // - The baseline starts with `CREATE TABLE` against a fresh schema - // and is gated by a `schema_migrations` row insertion. On a DB that - // has the baseline applied, dbmate-style version tracking skips the - // file; we do the same below. - // - Forward deltas use `IF NOT EXISTS` discipline so re-application - // against an already-migrated DB is a no-op. - const pg = await import("postgres"); - const sql = pg.default(dbUrl, { max: 1 }); - - try { - const migrationsDir = resolveExistingPath( - // Published @lobu/cli copies migrations next to start-local.bundle.mjs - // under dist/db/migrations. - join(fileURLToPath(new URL(".", import.meta.url)), "db", "migrations"), - join(APP_ROOT, "db", "migrations"), - // Monorepo `bun run --filter @lobu/server dev:local`: APP_ROOT is - // packages/server/, so the migrations live two levels up at repo root. - join(APP_ROOT, "..", "..", "db", "migrations"), - join(process.cwd(), "db", "migrations"), - join(process.cwd(), "..", "..", "db", "migrations"), - ); - if (!migrationsDir) { - throw new Error("Migrations directory not found."); - } - - // Make sure the `schema_migrations` ledger exists before we read it. - await sql.unsafe(` - CREATE TABLE IF NOT EXISTS public.schema_migrations ( - version character varying(128) NOT NULL PRIMARY KEY - ) - `); - - const appliedRows = (await sql.unsafe( - `SELECT version FROM public.schema_migrations`, - )) as Array<{ version: string }>; - const applied = new Set(appliedRows.map((r) => r.version)); - - // Versions whose contents are known to be fully covered by an existing - // schema (i.e. the squashed baseline). When one of these errors with a - // duplicate-object SQLSTATE the DB is already at the target state and we - // can safely record the version as applied. This is intentionally narrow: - // any future delta migration must use `IF NOT EXISTS` discipline rather - // than relying on this fallback, or its mid-file failures could mask - // schema drift. - const IDEMPOTENT_BASELINE_VERSIONS = new Set(["00000000000000"]); - - logger.info("Running migrations..."); - for (const file of listMigrationFiles(migrationsDir)) { - // Filename convention is `_.sql`; the version is the - // leading underscore-separated prefix. - const version = file.split("_")[0] ?? ""; - if (applied.has(version)) { - continue; - } - const migrationSql = loadMigrationUpSection(migrationsDir, file); - if (!migrationSql) continue; - - await sql.unsafe("SET search_path TO public"); - try { - await sql.unsafe(migrationSql); - } catch (err) { - // The squashed baseline uses plain `CREATE FUNCTION` / `CREATE TABLE` - // for cleanliness, so replaying it against a DB that already has the - // schema raises `42723` (duplicate function) / `42P07` (duplicate - // table) / `42710` (duplicate object). When the failing file is the - // baseline, that's exactly the no-op case `lobu run` should treat as - // success. For any other migration the duplicate error is surfaced - // unchanged so partial failures cannot silently advance the ledger - // (see `IDEMPOTENT_BASELINE_VERSIONS` above). - const code = (err as { code?: string } | null)?.code; - const isDuplicateObject = - code === "42723" || code === "42P07" || code === "42710"; - if (!isDuplicateObject || !IDEMPOTENT_BASELINE_VERSIONS.has(version)) { - throw err; - } - logger.info( - { migration: file, version, pgErrorCode: code }, - "Migration already applied (idempotent skip)", - ); - } - await sql` - INSERT INTO public.schema_migrations (version) VALUES (${version}) - ON CONFLICT DO NOTHING - `; - } - - logger.info("Migrations complete"); - } finally { - await sql.end(); - } -} - -// ─── Embeddings (child process) ────────────────────────────────── - -function findFreePort(): Promise { - return new Promise((resolve, reject) => { - const srv = http.createServer(); - srv.listen(0, "127.0.0.1", () => { - const addr = srv.address(); - const port = typeof addr === "object" && addr ? addr.port : 0; - srv.close(() => resolve(port)); - }); - srv.on("error", reject); - }); -} - -async function startEmbeddings(): Promise | null> { - const publishedServerPath = (() => { - try { - return fileURLToPath(import.meta.resolve("@lobu/embeddings/server")); - } catch { - return null; - } - })(); - const serverPath = resolveExistingPath( - join(APP_ROOT, "packages", "embeddings", "src", "server.ts"), - join(process.cwd(), "packages", "embeddings", "src", "server.ts"), - ...(publishedServerPath ? [publishedServerPath] : []), - ); - if (!serverPath) { - logger.warn( - "Embeddings service not found — embedding generation will not be available", - ); - return null; - } - - const port = EMBEDDINGS_PORT || (await findFreePort()); - const isTypescriptServer = serverPath.endsWith(".ts"); - let execArgv: string[] = []; - if (isTypescriptServer) { - const tsxPackageJson = require.resolve("tsx/package.json"); - const tsxLoaderPath = join(dirname(tsxPackageJson), "dist", "loader.mjs"); - execArgv = ["--import", tsxLoaderPath]; - } - - const child = fork(serverPath, [], { - execArgv, - env: { ...process.env, PORT: String(port) }, - stdio: ["ignore", "pipe", "pipe", "ipc"], - }); - - process.env.EMBEDDINGS_SERVICE_URL = `http://127.0.0.1:${port}`; - - child.stdout?.on("data", (data: Buffer) => { - const msg = data.toString().trim(); - if (msg) logger.info({ service: "embeddings" }, msg); - }); - - child.stderr?.on("data", (data: Buffer) => { - const msg = data.toString().trim(); - if (msg) logger.warn({ service: "embeddings" }, msg); - }); - - child.on("exit", (code) => { - if (code !== 0 && code !== null) { - logger.warn({ code }, "Embeddings service exited"); - } - }); - - return child; -} - -main().catch(async (error) => { - // Imported lazily so a crash in the env-setup block above (which runs - // before the lifecycle import) still reaches stderr. - const { reportBootFailure } = await import("./server-lifecycle"); - reportBootFailure(error); -}); diff --git a/packages/server/src/tools/admin/manage_watchers.ts b/packages/server/src/tools/admin/manage_watchers.ts index 332312e19..ba518ee86 100644 --- a/packages/server/src/tools/admin/manage_watchers.ts +++ b/packages/server/src/tools/admin/manage_watchers.ts @@ -2297,8 +2297,9 @@ async function handleList( i.scheduler_client_id, i.model_config, i.sources, - -- text[] is returned as the Postgres array literal "{a,b}" by PGlite's - -- TCP socket; wrap in to_jsonb so clients get a real JSON array. + -- With fetch_types:false (see db/client.ts) postgres.js does not parse + -- arrays, so text[] arrives as the literal "{a,b}"; wrap in to_jsonb so + -- clients get a real JSON array. to_jsonb(i.tags) AS tags, i.notification_channel, i.notification_priority, diff --git a/packages/server/src/tools/get_watchers.ts b/packages/server/src/tools/get_watchers.ts index aa32982f2..7fdc53cca 100644 --- a/packages/server/src/tools/get_watchers.ts +++ b/packages/server/src/tools/get_watchers.ts @@ -531,9 +531,9 @@ export async function getWatcher( // edit refactor version chains live on the group root, not on each // non-root assignment. // - // Tagged-template + sql.unsafe() inside the template breaks PGlite's - // simple-query mode (prepare=false), so we keep this path as a single - // unsafe call instead. + // Built as a single sql.unsafe() statement — composing sql.unsafe() + // fragments inside a tagged template alongside $N params is fragile, so the + // whole query is one unsafe call. const requestedVersion = args.template_version ?? null; const requestedVersionId = args.template_version_id ?? null; const namespacesLiteral = STANDARD_IDENTITY_NAMESPACES.map((n) => `'${n}'`).join(','); diff --git a/packages/server/src/tools/resolve_path.ts b/packages/server/src/tools/resolve_path.ts index 7d11b6835..cb8eea468 100644 --- a/packages/server/src/tools/resolve_path.ts +++ b/packages/server/src/tools/resolve_path.ts @@ -9,7 +9,7 @@ import * as Sentry from '@sentry/node'; import { type Static, Type } from '@sinclair/typebox'; -import { createDbClientFromEnv, getDb, simpleQuery } from '../db/client'; +import { createDbClientFromEnv, getDb } from '../db/client'; import type { Env } from '../index'; import { entityLinkMatchSql } from '../utils/content-search'; import { @@ -361,7 +361,7 @@ async function _resolvePath( // Cross-org tolerance: a tenant path can traverse into a public-catalog entity. // $member is per-tenant — never fall back to a public catalog's $member row, since // member-redaction uses the caller's workspace role, not the resolved entity's org. - const row = await simpleQuery(sql` + const row = await sql` SELECT e.id, et.slug AS entity_type, e.slug, e.name, e.parent_id FROM entities e JOIN entity_types et ON et.id = e.entity_type_id @@ -379,7 +379,7 @@ async function _resolvePath( ) ORDER BY (e.organization_id = ${workspace.id}) DESC, e.id ASC LIMIT 1 - `); + `; if (row.length === 0) { throw new ToolUserError( @@ -401,7 +401,7 @@ async function _resolvePath( // Leaf entity: fetch core data (without expensive COUNT subqueries). // Cross-org tolerance: same widening as the intermediate query, excluding $member. - const row = await simpleQuery(sql` + const row = await sql` SELECT e.id, et.slug AS entity_type, @@ -432,7 +432,7 @@ async function _resolvePath( ) ORDER BY (e.organization_id = ${workspace.id}) DESC, e.id ASC LIMIT 1 - `); + `; if (row.length === 0) { throw new ToolUserError( @@ -471,15 +471,13 @@ async function _resolvePath( { cleanTemplate: entityCleanTpl, templateData: entityTemplateData }, ] = await Sentry.startSpan({ name: 'entity:counts+tabs', op: 'db' }, () => Promise.all([ - simpleQuery( - sql.unsafe<{ cnt: number }>( - `SELECT COUNT(*) as cnt FROM current_event_records ev + sql.unsafe<{ cnt: number }>( + `SELECT COUNT(*) as cnt FROM current_event_records ev WHERE ${entityLinkMatchSql(`${Number(entityRow.id)}::bigint`, 'ev')} AND ev.organization_id = $1`, - [workspace.id] - ) + [workspace.id] ), - simpleQuery(sql` + sql` SELECT COUNT(DISTINCT cn.connector_key) as cnt FROM feeds f JOIN connections cn ON cn.id = f.connection_id @@ -487,13 +485,11 @@ async function _resolvePath( AND f.organization_id = ${workspace.id} AND f.deleted_at IS NULL AND cn.deleted_at IS NULL - `), - simpleQuery( - sql`SELECT COUNT(*) as cnt FROM watchers i + `, + sql`SELECT COUNT(*) as cnt FROM watchers i WHERE ${Number(entityRow.id)}::int = ANY(i.entity_ids) AND i.organization_id = ${workspace.id} - AND i.status = 'active'` - ), + AND i.status = 'active'`, fetchTabs(sql, 'entity', String(entityRow.id), workspace.id), fetchTabs(sql, 'entity_type', entityRow.entity_type, workspace.id), processTemplateDataSources(entityRow.json_template, entityDataCtx, sql), @@ -512,11 +508,11 @@ async function _resolvePath( let safeEntityMetadata = rawEntityMetadata; const canSeeEmail = ctx.memberRole === 'owner' || ctx.memberRole === 'admin'; if (!canSeeEmail) { - const schemaRow = await simpleQuery(sql` + const schemaRow = await sql` SELECT metadata_schema FROM entity_types WHERE slug = '$member' AND organization_id = ${workspace.id} AND deleted_at IS NULL LIMIT 1 - `); + `; const memberSchema = (schemaRow[0]?.metadata_schema as Record | null) ?? null; const { emailField } = resolveMemberSchemaFieldsFromSchema(memberSchema); if (entityRow.entity_type === '$member' && emailField in safeEntityMetadata) { @@ -558,7 +554,7 @@ async function _resolvePath( // Fetch children + siblings without per-row COUNT subqueries. // content_count is omitted to avoid expensive GIN index scans over the events table. const [childRows, siblingRows] = await Promise.all([ - simpleQuery(sql` + sql` SELECT e.id, et.slug AS entity_type, e.slug, e.name, e.metadata::jsonb->>'market' as market FROM entities e @@ -566,8 +562,8 @@ async function _resolvePath( WHERE e.organization_id = ${workspace.id} AND e.parent_id = ${resolvedEntity.id} ORDER BY e.name ASC - `), - simpleQuery(sql` + `, + sql` SELECT e.id, et.slug AS entity_type, e.slug, e.name FROM entities e JOIN entity_types et ON et.id = e.entity_type_id @@ -578,7 +574,7 @@ async function _resolvePath( OR e.parent_id = ${resolvedEntity.parent_id} ) ORDER BY e.name ASC - `), + `, ]); children = childRows.map((row) => ({ @@ -685,7 +681,7 @@ async function listEntityTypes( sql: DbClient, organizationId: string ): Promise { - const rows = await simpleQuery(sql` + const rows = await sql` SELECT et.id, et.slug, @@ -701,7 +697,7 @@ async function listEntityTypes( AND et.organization_id = ${organizationId} GROUP BY et.id, et.slug, et.name, et.description, et.icon, et.color ORDER BY et.name ASC - `); + `; return rows.map((row) => ({ id: Number(row.id), @@ -723,7 +719,7 @@ async function fetchScopeSummary( // Agents are org-scoped; devices are owned by the requesting user. Both are // sidebar-nav badges that don't narrow with the focused entity, so fetch // them regardless of `entity`. - const [navRow] = await simpleQuery(sql` + const [navRow] = await sql` SELECT ( SELECT COUNT(*)::int @@ -735,7 +731,7 @@ async function fetchScopeSummary( FROM device_workers dw WHERE dw.user_id = ${userId} ) AS devices_count - `); + `; const agentsCount = Number((navRow as { agents_count?: number } | undefined)?.agents_count) || 0; const devicesCount = Number((navRow as { devices_count?: number } | undefined)?.devices_count) || 0; @@ -750,7 +746,7 @@ async function fetchScopeSummary( }; } - const [row] = await simpleQuery(sql` + const [row] = await sql` SELECT ( SELECT COUNT(*)::int @@ -769,7 +765,7 @@ async function fetchScopeSummary( WHERE w.organization_id = ${organizationId} AND w.status = 'active' ) AS watchers_count - `); + `; return { total_content: Number((row as { total_content?: number } | undefined)?.total_content) || 0, @@ -786,16 +782,15 @@ async function fetchRecentContent( organizationId: string, entityId: number | null ): Promise { - // Inline the entity-link match as raw SQL — postgres.js can't combine - // sql.unsafe() inside a tagged template that also has $N values when - // running against PGlite (prepare:false simple-query mode). + // Inline the entity-link match as raw SQL — this whole query is built as a + // single sql.unsafe() statement rather than mixing sql.unsafe() fragments + // inside a tagged template that also carries $N values. const entityFilter = entityId !== null ? `AND ${entityLinkMatchSql(`${Number(entityId)}::bigint`, 'ev')}` : ''; - const rows = await simpleQuery( - sql.unsafe>( - ` + const rows = await sql.unsafe>( + ` SELECT ev.id, ev.entity_ids, @@ -823,8 +818,7 @@ async function fetchRecentContent( ORDER BY COALESCE(ev.occurred_at, ev.created_at) DESC LIMIT $2 `, - [organizationId, BOOTSTRAP_RECENT_LIMIT] - ) + [organizationId, BOOTSTRAP_RECENT_LIMIT] ); return (rows as Array>).map((row) => ({ @@ -851,7 +845,7 @@ async function fetchRecentFeeds( organizationId: string, entityId: number | null ): Promise { - const rows = await simpleQuery(sql` + const rows = await sql` WITH scoped_feeds AS ( SELECT f.id, @@ -904,7 +898,7 @@ async function fetchRecentFeeds( FROM scoped_feeds sf LEFT JOIN event_counts ec ON ec.feed_id = sf.id ORDER BY COALESCE(sf.updated_at, sf.created_at) DESC - `); + `; return (rows as Array>).map((row) => ({ id: Number(row.id), @@ -929,7 +923,7 @@ async function fetchRecentWatchers( organizationId: string, entityId: number | null ): Promise { - const rows = await simpleQuery(sql` + const rows = await sql` WITH scoped_watchers AS ( SELECT w.id, @@ -979,7 +973,7 @@ async function fetchRecentWatchers( LEFT JOIN entity_types pet ON pet.id = parent.entity_type_id LEFT JOIN watcher_window_counts wwc ON wwc.watcher_id = sw.id ORDER BY COALESCE(sw.updated_at, sw.created_at) DESC - `); + `; return (rows as Array>).map((row) => ({ watcher_id: String(row.watcher_id), @@ -1029,7 +1023,7 @@ async function listConnectorDefinitions( sql: DbClient, organizationId: string ): Promise { - const rows = await simpleQuery(sql` + const rows = await sql` SELECT d.key, d.name, @@ -1041,7 +1035,7 @@ async function listConnectorDefinitions( WHERE d.status = 'active' AND d.organization_id = ${organizationId} ORDER BY d.name ASC - `); + `; return rows.map((row) => ({ key: String(row.key), @@ -1060,7 +1054,7 @@ async function fetchTabs( resourceId: string, organizationId: string ): Promise { - const rows = await simpleQuery(sql` + const rows = await sql` SELECT vtat.tab_name, vtat.tab_order, @@ -1073,7 +1067,7 @@ async function fetchTabs( AND vtat.resource_id = ${resourceId} AND vtat.organization_id = ${organizationId} ORDER BY vtat.tab_order ASC, vtat.tab_name ASC - `); + `; return rows.map((row) => ({ tab_name: String(row.tab_name), diff --git a/packages/server/src/utils/assert-node-version.ts b/packages/server/src/utils/assert-node-version.ts index 5986a23b9..d95a488d3 100644 --- a/packages/server/src/utils/assert-node-version.ts +++ b/packages/server/src/utils/assert-node-version.ts @@ -39,5 +39,5 @@ function assertSupportedNodeVersion(): void { // Run on module load so a single side-effect import at the top of an entry // file fires the check BEFORE any other static import executes. ESM evaluates // sibling imports in textual order; placing this module's import first -// guarantees the assertion runs before instrument.ts, dotenv, pglite, etc. +// guarantees the assertion runs before instrument.ts, dotenv, embedded-postgres, etc. assertSupportedNodeVersion(); diff --git a/packages/server/src/utils/insert-event.ts b/packages/server/src/utils/insert-event.ts index d7950b1f1..77dce5138 100644 --- a/packages/server/src/utils/insert-event.ts +++ b/packages/server/src/utils/insert-event.ts @@ -320,10 +320,9 @@ export async function insertEvent( if (!inserted) { // INSERT ... RETURNING should always yield a row for a successful insert. // An empty result means either (a) a BEFORE INSERT trigger silently - // RETURNed NULL (none in our schema today), (b) a PGlite quirk around - // GENERATED STORED columns failing without surfacing an error, or - // (c) postgres.js dropping the rows for an obscure reason. None of these - // should drop events on the floor — convert the cryptic `Cannot read + // RETURNed NULL (none in our schema today), or (b) postgres.js dropping + // the rows for an obscure reason. Neither should drop events on the + // floor — convert the cryptic `Cannot read // properties of undefined (reading 'id')` into a real error with // diagnostic context so we can root-cause when it next happens. logger.error( diff --git a/packages/server/src/workspace/multi-tenant.ts b/packages/server/src/workspace/multi-tenant.ts index fa79ab7dd..6d4a07d25 100644 --- a/packages/server/src/workspace/multi-tenant.ts +++ b/packages/server/src/workspace/multi-tenant.ts @@ -5,7 +5,7 @@ import { OAuthProvider } from '../auth/oauth/provider'; import type { AuthInfo } from '../auth/oauth/types'; import { PersonalAccessTokenService } from '../auth/tokens'; import { isPublicReadable } from '../auth/tool-access'; -import { getDb, simpleQuery } from '../db/client'; +import { getDb } from '../db/client'; import type { Env } from '../index'; import logger from '../utils/logger'; import { getConfiguredPublicOrigin } from '../utils/public-origin'; @@ -70,13 +70,11 @@ export async function getCachedMembershipRole( const key = `${organizationId}:${userId}`; const cached = memberRoleCache.get(key); if (cached !== undefined) return cached; - const rows = await simpleQuery( - getDb()` + const rows = await getDb()` SELECT role FROM "member" WHERE "organizationId" = ${organizationId} AND "userId" = ${userId} LIMIT 1 - ` - ); + `; const role = rows.length > 0 ? (rows[0].role as string) : null; memberRoleCache.set(key, role); return role; @@ -90,11 +88,9 @@ export async function getCachedOrgBySlug( ): Promise<{ id: string; visibility: string } | null> { const cached = orgSlugCache.get(slug); if (cached) return cached; - const rows = await simpleQuery( - getDb()` + const rows = await getDb()` SELECT id, visibility FROM "organization" WHERE slug = ${slug} LIMIT 1 - ` - ); + `; if (rows.length === 0) return null; const record = { id: rows[0].id as string, @@ -113,11 +109,9 @@ export async function getCachedOrgBySlug( export async function getOrgById( organizationId: string ): Promise<{ slug: string; visibility: string } | null> { - const rows = await simpleQuery( - getDb()` + const rows = await getDb()` SELECT slug, visibility FROM "organization" WHERE id = ${organizationId} LIMIT 1 - ` - ); + `; if (rows.length === 0) return null; return { slug: rows[0].slug as string, @@ -167,11 +161,11 @@ export class MultiTenantProvider implements WorkspaceProvider { requestedOrgId = cached.id; requestedOrgVisibility = cached.visibility; } else { - const orgResult = await simpleQuery(sql` + const orgResult = await sql` SELECT id, visibility FROM "organization" WHERE slug = ${requestedOrgSlug} LIMIT 1 - `); + `; if (orgResult.length === 0) { return c.json( { @@ -226,11 +220,11 @@ export class MultiTenantProvider implements WorkspaceProvider { if (cached !== undefined) return cached; } - const result = await simpleQuery(sql` + const result = await sql` SELECT role FROM "member" WHERE "organizationId" = ${orgId} AND "userId" = ${userId} LIMIT 1 - `); + `; const role = result.length > 0 ? (result[0].role as string) : null; memberRoleCache.set(cacheKey, role); return role; @@ -293,13 +287,13 @@ export class MultiTenantProvider implements WorkspaceProvider { } ); } - const agentRows = await simpleQuery(sql` + const agentRows = await sql` SELECT owner_user_id FROM agents WHERE id = ${tokenData.agentId} AND organization_id = ${requestedOrgId} LIMIT 1 - `); + `; if (agentRows.length === 0) { return c.json( { error: 'insufficient_scope', error_description: 'Worker token is not valid for this organization' }, @@ -307,13 +301,13 @@ export class MultiTenantProvider implements WorkspaceProvider { ); } const directAuthUserId = (agentRows[0]?.owner_user_id as string | undefined) ?? tokenData.userId; - const roleRows = await simpleQuery(sql` + const roleRows = await sql` SELECT role FROM "member" WHERE "organizationId" = ${requestedOrgId} AND "userId" = ${directAuthUserId} LIMIT 1 - `); + `; const directAuthRole = roleRows[0]?.role as string | undefined; if (!directAuthRole || !['owner', 'admin'].includes(directAuthRole)) { return c.json( @@ -444,12 +438,12 @@ export class MultiTenantProvider implements WorkspaceProvider { let bearerUser: { id: string; email: string; name: string; emailVerified: boolean } | null = null; try { - const userRows = await simpleQuery(sql` + const userRows = await sql` SELECT id, email, name, "emailVerified" FROM "user" WHERE id = ${authInfo.userId} LIMIT 1 - `); + `; if (userRows.length > 0) { const row = userRows[0] as { id: string; @@ -618,30 +612,26 @@ export class MultiTenantProvider implements WorkspaceProvider { const params: string[] = []; const searchClause = search ? `AND o.name ILIKE $${params.push(`%${search}%`)}` : ''; - return simpleQuery( - sql.unsafe( - `SELECT o.id, o.name, o.slug, o.logo, o.description, o."createdAt" as created_at, false as is_member, o.visibility + return sql.unsafe( + `SELECT o.id, o.name, o.slug, o.logo, o.description, o."createdAt" as created_at, false as is_member, o.visibility FROM "organization" o WHERE o.visibility = 'public' ${searchClause} ORDER BY o.name ASC`, - params - ) + params ); } const params: string[] = [userId]; const searchClause = search ? `AND o.name ILIKE $${params.push(`%${search}%`)}` : ''; - return simpleQuery( - sql.unsafe( - `SELECT o.id, o.name, o.slug, o.logo, o.description, o."createdAt" as created_at, + return sql.unsafe( + `SELECT o.id, o.name, o.slug, o.logo, o.description, o."createdAt" as created_at, (m."userId" IS NOT NULL) as is_member, o.visibility FROM "organization" o LEFT JOIN "member" m ON o.id = m."organizationId" AND m."userId" = $1 WHERE (m."userId" IS NOT NULL OR o.visibility = 'public') ${searchClause} ORDER BY o.name ASC`, - params - ) + params ); } @@ -651,9 +641,9 @@ export class MultiTenantProvider implements WorkspaceProvider { async getOrgSlug(orgId: string): Promise { const sql = getDb(); - const rows = await simpleQuery(sql` + const rows = await sql` SELECT slug FROM "organization" WHERE id = ${orgId} LIMIT 1 - `); + `; return rows[0]?.slug ?? null; } @@ -661,11 +651,9 @@ export class MultiTenantProvider implements WorkspaceProvider { if (orgIds.length === 0) return new Map(); const sql = getDb(); const placeholders = orgIds.map((_, i) => `$${i + 1}`).join(', '); - const rows = await simpleQuery( - sql.unsafe<{ id: string; slug: string }>( - `SELECT id, slug FROM "organization" WHERE id IN (${placeholders})`, - orgIds - ) + const rows = await sql.unsafe<{ id: string; slug: string }>( + `SELECT id, slug FROM "organization" WHERE id IN (${placeholders})`, + orgIds ); return new Map(rows.map((row) => [row.id, row.slug])); } @@ -676,7 +664,7 @@ export class MultiTenantProvider implements WorkspaceProvider { if (cached !== undefined) return cached; const sql = getDb(); - const rows = await simpleQuery(sql` + const rows = await sql` SELECT n.slug, n.type, @@ -688,21 +676,21 @@ export class MultiTenantProvider implements WorkspaceProvider { LEFT JOIN organization o ON n.type = 'organization' AND n.ref_id = o.id WHERE n.slug = ${slug} AND n.type = ${type} - `); + `; if (rows.length === 0) { // Fallback: namespace entry may be missing, query organization table directly if (type === 'organization') { - const orgRows = await simpleQuery(sql` + const orgRows = await sql` SELECT id, name, slug FROM organization WHERE slug = ${slug} LIMIT 1 - `); + `; if (orgRows.length > 0) { const org = orgRows[0] as { id: string; name: string; slug: string }; // Self-heal: backfill the missing namespace entry - await simpleQuery(sql` + await sql` INSERT INTO namespace (slug, type, ref_id) VALUES (${slug}, 'organization', ${org.id}) ON CONFLICT (slug) DO NOTHING - `); + `; const result: ResolvedOwner = { slug: org.slug, type: 'organization', diff --git a/release-please-config.json b/release-please-config.json index 10a5c05d4..ef39a35e3 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -56,6 +56,11 @@ "path": "packages/embeddings/package.json", "jsonpath": "$.version" }, + { + "type": "json", + "path": "packages/pgvector-embedded/package.json", + "jsonpath": "$.version" + }, { "type": "json", "path": "packages/promptfoo-provider/package.json", diff --git a/scripts/dev-native.sh b/scripts/dev-native.sh index eadd9774a..afc940470 100755 --- a/scripts/dev-native.sh +++ b/scripts/dev-native.sh @@ -130,15 +130,17 @@ mkdir -p "$LOBU_WORKSPACE_ROOT" # --- Run ------------------------------------------------------------------- if [ -z "${DATABASE_URL:-}" ]; then - # No external Postgres → boot the embedded PGlite backend (src/start-local.ts). - # First run mints a web login (dev@lobu.local / lobudev123, org "dev") and a - # bootstrap PAT under LOBU_DATA_DIR. Vite HMR still runs in-process. - export LOBU_DATA_DIR="${LOBU_DATA_DIR:-$REPO_ROOT/.lobu-dev}" + # No external Postgres → boot the embedded Postgres backend (src/server.ts). + # DATABASE_URL=file:// is the single backend selector; the cluster lives + # at /.lobu/pgdata. First run mints a web login (dev@lobu.local / + # lobudev123, org "dev"). Vite HMR still runs in-process. + DEV_DATA_ROOT="$REPO_ROOT/.lobu-dev" + export DATABASE_URL="file://${DEV_DATA_ROOT}" export PGSSLMODE=disable - mkdir -p "$LOBU_DATA_DIR" - echo "→ no DATABASE_URL set — booting embedded PGlite" + mkdir -p "$DEV_DATA_ROOT" + echo "→ no DATABASE_URL set — booting embedded Postgres" echo "→ server on http://${HOST}:${PORT} (Vite HMR in-process)" - echo "→ data dir: $LOBU_DATA_DIR" + echo "→ data dir: $DEV_DATA_ROOT/.lobu/pgdata" echo "→ first run seeds a web login: dev@lobu.local / lobudev123 (org 'dev')" echo "→ then run \`lobu apply\` from a project dir to sync its lobu.toml" echo "" diff --git a/scripts/e2e-lobu-apply.sh b/scripts/e2e-lobu-apply.sh index 27422739d..d59035b71 100755 --- a/scripts/e2e-lobu-apply.sh +++ b/scripts/e2e-lobu-apply.sh @@ -2,9 +2,9 @@ # # End-to-end harness for `lobu apply` v1. # -# Boots `start-local.ts` (auto-bootstraps an admin PAT on empty data dir), +# Boots the embedded server (auto-bootstraps an admin PAT on empty data dir), # drives the CLI through create → noop → update → drift, and asserts the -# round-trip against PGlite. +# round-trip against the local embedded Postgres. # # Idempotent: cleans up its own server, data dir, and project dir on exit. @@ -65,18 +65,15 @@ fi LOBU="node ${CLI_BIN}" # ─── 2. start server ─────────────────────────────────────────────────── -echo "==> step 2: start start-local.ts on :${PORT}" +echo "==> step 2: start the embedded server on :${PORT}" -# Unset DATABASE_URL — start-local.ts boots PGlite and writes its own -# socket URL into process.env. A pre-set DATABASE_URL would race with the -# socket bind. +# DATABASE_URL=file:// → server.ts boots an embedded Postgres rooted there +# (cluster at /.lobu/pgdata) and rewrites DATABASE_URL to the TCP URL. env \ - -u DATABASE_URL \ - LOBU_DATA_DIR="${DATA_DIR}" \ + DATABASE_URL="file://${DATA_DIR}" \ PORT="${PORT}" \ HOST=127.0.0.1 \ - PG_SOCKET_PORT=0 \ - bun run "${REPO_ROOT}/packages/server/src/start-local.ts" \ + bun run "${REPO_ROOT}/packages/server/src/server.ts" \ >"${SERVER_LOG}" 2>&1 & SERVER_PID=$! diff --git a/scripts/publish-packages.mjs b/scripts/publish-packages.mjs index 9d10dfbaa..790b30b20 100644 --- a/scripts/publish-packages.mjs +++ b/scripts/publish-packages.mjs @@ -24,6 +24,8 @@ const PACKAGES = [ { dir: "packages/connector-sdk", transform: rewriteWorkspaceRefs }, { dir: "packages/agent-worker", transform: rewriteWorkspaceRefs }, { dir: "packages/embeddings", transform: rewriteWorkspaceRefs }, + // Before cli — cli depends on @lobu/pgvector-embedded (workspace:*). + { dir: "packages/pgvector-embedded", transform: rewriteWorkspaceRefs }, { dir: "packages/cli", transform: rewriteWorkspaceRefs }, { dir: "packages/openclaw-plugin", diff --git a/scripts/review.sh b/scripts/review.sh index 08e76943c..ace49ccf3 100755 --- a/scripts/review.sh +++ b/scripts/review.sh @@ -102,7 +102,14 @@ if [ -f .env ]; then . ./.env set +a fi -[ -n "${DATABASE_URL:-}" ] || { echo "DATABASE_URL not set." >&2; exit 2; } + +# Tests must NOT run against whatever DATABASE_URL .env points at (often a +# shared/tailnet DB) — they run DDL like `DROP SCHEMA public`. Unset it so the +# test harness spawns an isolated, ephemeral embedded Postgres per run (see +# packages/server/src/__tests__/setup/embedded-postgres-backend.ts). This also +# removes the old "ALTER SCHEMA public OWNER" hack: the embedded cluster's +# bootstrap role already owns its schema. +unset DATABASE_URL # --- build ------------------------------------------------------------------ # Tests need workspace packages built. Worktree's `dist/` may be stale or @@ -118,16 +125,6 @@ if [ $BUILD_EXIT -ne 0 ]; then echo "!! build failed (exit $BUILD_EXIT) — proceeding so pi can review the diff, but unit tests will likely fail" >&2 fi -# --- DB schema ownership ---------------------------------------------------- -# Postgres 15+ restricts CREATE on the `public` schema to its owner. Integration -# tests run DDL as the DATABASE_URL user — make them the schema owner so -# `setupTestDatabase` doesn't trip on "must be owner of schema public". - -DB_USER="$(printf '%s' "$DATABASE_URL" | sed -E 's|^postgres(ql)?://([^:@/]+).*|\2|')" -if [ -n "$DB_USER" ] && [ "$DB_USER" != "$DATABASE_URL" ]; then - psql "$DATABASE_URL" -tAc "ALTER SCHEMA public OWNER TO \"$DB_USER\"" >/dev/null 2>&1 || true -fi - # --- test suites ------------------------------------------------------------ TYPECHECK_LOG="/tmp/lobu-review-typecheck.log"