Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions db/migrations/20260517010000_drop_unused_indexes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
-- migrate:up

-- Drop 4 indexes that pg_stat_user_indexes reported `idx_scan = 0` after 28h
-- of prod uptime AND are not referenced from any active code path.
--
-- A larger set (4 more, ~5 GB combined) was originally bundled here but
-- review caught they're not actually unused — they're dormant. The three
-- big search indexes (`idx_events_embedding`, `idx_events_raw_content_trgm`,
-- `idx_events_search_tsv`) are explicitly used by the ANN/fulltext/trigram
-- branches of `approximate_candidate_search` in
-- `packages/server/src/utils/content-search.ts:1707-1733`. The `search()`
-- agent tool path threads through there; prod just hasn't called it in
-- 28h, but a single user-initiated search would now time out at 6s and
-- return empty results without those indexes (`content-search.ts:1850-1863`).
-- Similarly `idx_events_run_id` backs the "view in memory" filter
-- (`content-query-filters.ts:197-201`); rare, but a real path.
--
-- Keep those four until either (a) the dormant features are removed in
-- code, or (b) measured prod traffic confirms they're abandoned.
--
-- What remains is small but still real write amplification: each kept
-- INSERT into events updates these btrees. Combined size ~66 MB —
-- modest reclaim, but zero downside since the underlying queries don't
-- exist anywhere in the codebase today (verified by grep).
--
-- Plain `DROP INDEX` (not CONCURRENTLY) is used because dbmate's
-- `transaction:false` directive doesn't actually exit the transaction
-- block against the `pq` driver — see the comment in
-- 20260426130001_db_integrity_cleanup_concurrent.sql. These 4 indexes
-- are all small btrees so the ACCESS EXCLUSIVE on `events` during the
-- drop is sub-second; no operator runbook needed.

DROP INDEX IF EXISTS public.idx_events_entity_ids_occurred_at;
DROP INDEX IF EXISTS public.idx_events_origin_parent_id;
DROP INDEX IF EXISTS public.idx_events_thread_lookup;
DROP INDEX IF EXISTS public.idx_events_type;

-- migrate:down

CREATE INDEX IF NOT EXISTS idx_events_entity_ids_occurred_at
ON public.events USING btree ((entity_ids[1]), occurred_at DESC, id DESC)
WHERE ((entity_ids IS NOT NULL) AND (entity_ids <> '{}'::bigint[]));
CREATE INDEX IF NOT EXISTS idx_events_origin_parent_id
ON public.events USING btree (origin_parent_id);
CREATE INDEX IF NOT EXISTS idx_events_thread_lookup
ON public.events USING btree (origin_parent_id, occurred_at)
WHERE (origin_parent_id IS NOT NULL);
CREATE INDEX IF NOT EXISTS idx_events_type
ON public.events USING btree (origin_type) WHERE (origin_type IS NOT NULL);
27 changes: 2 additions & 25 deletions db/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3403,12 +3403,6 @@ CREATE INDEX idx_events_embedding ON public.event_embeddings USING ivfflat (embe

CREATE INDEX idx_events_entity_ids ON public.events USING gin (entity_ids);

--
-- Name: idx_events_entity_ids_occurred_at; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX idx_events_entity_ids_occurred_at ON public.events USING btree ((entity_ids[1]), occurred_at DESC, id DESC) WHERE ((entity_ids IS NOT NULL) AND (entity_ids <> '{}'::bigint[]));

--
-- Name: idx_events_feed_id; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -3487,12 +3481,6 @@ CREATE INDEX idx_events_missing_embedding_backfill ON public.events USING btree

CREATE INDEX idx_events_organization_id ON public.events USING btree (organization_id) WHERE (organization_id IS NOT NULL);

--
-- Name: idx_events_origin_parent_id; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX idx_events_origin_parent_id ON public.events USING btree (origin_parent_id);

--
-- Name: idx_events_raw_content_trgm; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -3529,18 +3517,6 @@ CREATE INDEX idx_events_source_embedding ON public.event_embeddings USING btree

CREATE UNIQUE INDEX idx_events_superseded_by ON public.events USING btree (supersedes_event_id) WHERE (supersedes_event_id IS NOT NULL);

--
-- Name: idx_events_thread_lookup; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX idx_events_thread_lookup ON public.events USING btree (origin_parent_id, occurred_at) WHERE (origin_parent_id IS NOT NULL);

--
-- Name: idx_events_type; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX idx_events_type ON public.events USING btree (origin_type) WHERE (origin_type IS NOT NULL);

--
-- Name: idx_feeds_connection; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -5009,4 +4985,5 @@ INSERT INTO public.schema_migrations (version) VALUES
('20260515170000'),
('20260516120000'),
('20260516200000'),
('20260516200100');
('20260516200100'),
('20260517010000');
2 changes: 2 additions & 0 deletions docs/MIGRATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ A single `DELETE FROM connections WHERE id IN (...)` triggers an internal `UPDAT

Indexing alone helps the cascade, but it doesn't eliminate the per-row WAL write; batching before the delete is what keeps the API responsive.

**Lobu-specific policy:** **connections are never hard-deleted in prod.** Setting `connections.deleted_at` is the final state. The `events_connection_id_fkey ... ON DELETE SET NULL` cascade exists in the schema for completeness, but actually invoking it (`DELETE FROM connections WHERE deleted_at IS NOT NULL`) blocks the API for ~13s per connection at current scale — the 2026-05-16 `pg_stat_statements` showed exactly this pattern at rank #8 (5 calls × 13.4s each). Soft-deleted connections cost ~50 bytes apiece in the `connections` table; the occasional accumulation isn't worth the recurring stall. The connection-creation rollback path in `tools/admin/manage_connections.ts` (which only deletes never-activated rows that have no events yet) is the only acceptable use of `DELETE FROM connections`.

### Bare `DROP INDEX`

Takes `ACCESS EXCLUSIVE`. Use `DROP INDEX CONCURRENTLY` (also `transaction:false`).
Expand Down
8 changes: 7 additions & 1 deletion packages/server/src/tools/admin/manage_connections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,13 @@ async function handleList(
AND NOT (dw.id IS NOT NULL AND dw.last_seen_at > now() - interval '20 minutes')
THEN 'offline'
END AS device_status,
(SELECT COUNT(*) FROM current_event_records e WHERE e.connection_id = c.id)::int AS event_count,
-- event_count intentionally omitted from list responses: the
-- per-row correlated count via current_event_records does a
-- supersedes anti-join over the events table and was the dominant
-- cost in this query (1303ms mean → 2.3ms without it; see the
-- post-incident perf brainstorm). For the per-connection detail
-- page, handleGet below still computes it — that path is a single
-- row and costs ~1.2ms.
(SELECT COUNT(*) FROM feeds f WHERE f.connection_id = c.id AND f.deleted_at IS NULL)::int AS feed_count,
(SELECT ct.token FROM connect_tokens ct
WHERE ct.connection_id = c.id AND ct.status = 'pending' AND ct.expires_at > NOW()
Expand Down
2 changes: 1 addition & 1 deletion packages/web
Submodule web updated from c39010 to d95b9b
Loading