lobu-ai · buremba · May 16, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/db/migrations/20260517010000_drop_unused_indexes.sql b/db/migrations/20260517010000_drop_unused_indexes.sql
@@ -0,0 +1,49 @@
+-- migrate:up
+
+-- Drop 4 indexes that pg_stat_user_indexes reported `idx_scan = 0` after 28h
+-- of prod uptime AND are not referenced from any active code path.
+--
+-- A larger set (4 more, ~5 GB combined) was originally bundled here but
+-- review caught they're not actually unused — they're dormant. The three
+-- big search indexes (`idx_events_embedding`, `idx_events_raw_content_trgm`,
+-- `idx_events_search_tsv`) are explicitly used by the ANN/fulltext/trigram
+-- branches of `approximate_candidate_search` in
+-- `packages/server/src/utils/content-search.ts:1707-1733`. The `search()`
+-- agent tool path threads through there; prod just hasn't called it in
+-- 28h, but a single user-initiated search would now time out at 6s and
+-- return empty results without those indexes (`content-search.ts:1850-1863`).
+-- Similarly `idx_events_run_id` backs the "view in memory" filter
+-- (`content-query-filters.ts:197-201`); rare, but a real path.
+--
+-- Keep those four until either (a) the dormant features are removed in
+-- code, or (b) measured prod traffic confirms they're abandoned.
+--
+-- What remains is small but still real write amplification: each kept
+-- INSERT into events updates these btrees. Combined size ~66 MB —
+-- modest reclaim, but zero downside since the underlying queries don't
+-- exist anywhere in the codebase today (verified by grep).
+--
+-- Plain `DROP INDEX` (not CONCURRENTLY) is used because dbmate's
+-- `transaction:false` directive doesn't actually exit the transaction
+-- block against the `pq` driver — see the comment in
+-- 20260426130001_db_integrity_cleanup_concurrent.sql. These 4 indexes
+-- are all small btrees so the ACCESS EXCLUSIVE on `events` during the
+-- drop is sub-second; no operator runbook needed.
+
+DROP INDEX IF EXISTS public.idx_events_entity_ids_occurred_at;
+DROP INDEX IF EXISTS public.idx_events_origin_parent_id;
+DROP INDEX IF EXISTS public.idx_events_thread_lookup;
+DROP INDEX IF EXISTS public.idx_events_type;
+
+-- migrate:down
+
+CREATE INDEX IF NOT EXISTS idx_events_entity_ids_occurred_at
+    ON public.events USING btree ((entity_ids[1]), occurred_at DESC, id DESC)
+    WHERE ((entity_ids IS NOT NULL) AND (entity_ids <> '{}'::bigint[]));
+CREATE INDEX IF NOT EXISTS idx_events_origin_parent_id
+    ON public.events USING btree (origin_parent_id);
+CREATE INDEX IF NOT EXISTS idx_events_thread_lookup
+    ON public.events USING btree (origin_parent_id, occurred_at)
+    WHERE (origin_parent_id IS NOT NULL);
+CREATE INDEX IF NOT EXISTS idx_events_type
+    ON public.events USING btree (origin_type) WHERE (origin_type IS NOT NULL);
diff --git a/db/schema.sql b/db/schema.sql
@@ -3403,12 +3403,6 @@ CREATE INDEX idx_events_embedding ON public.event_embeddings USING ivfflat (embe
 
 CREATE INDEX idx_events_entity_ids ON public.events USING gin (entity_ids);
 
---
--- Name: idx_events_entity_ids_occurred_at; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX idx_events_entity_ids_occurred_at ON public.events USING btree ((entity_ids[1]), occurred_at DESC, id DESC) WHERE ((entity_ids IS NOT NULL) AND (entity_ids <> '{}'::bigint[]));
-
 --
 -- Name: idx_events_feed_id; Type: INDEX; Schema: public; Owner: -
 --
@@ -3487,12 +3481,6 @@ CREATE INDEX idx_events_missing_embedding_backfill ON public.events USING btree
 
 CREATE INDEX idx_events_organization_id ON public.events USING btree (organization_id) WHERE (organization_id IS NOT NULL);
 
---
--- Name: idx_events_origin_parent_id; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX idx_events_origin_parent_id ON public.events USING btree (origin_parent_id);
-
 --
 -- Name: idx_events_raw_content_trgm; Type: INDEX; Schema: public; Owner: -
 --
@@ -3529,18 +3517,6 @@ CREATE INDEX idx_events_source_embedding ON public.event_embeddings USING btree
 
 CREATE UNIQUE INDEX idx_events_superseded_by ON public.events USING btree (supersedes_event_id) WHERE (supersedes_event_id IS NOT NULL);
 
---
--- Name: idx_events_thread_lookup; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX idx_events_thread_lookup ON public.events USING btree (origin_parent_id, occurred_at) WHERE (origin_parent_id IS NOT NULL);
-
---
--- Name: idx_events_type; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX idx_events_type ON public.events USING btree (origin_type) WHERE (origin_type IS NOT NULL);
-
 --
 -- Name: idx_feeds_connection; Type: INDEX; Schema: public; Owner: -
 --
@@ -5009,4 +4985,5 @@ INSERT INTO public.schema_migrations (version) VALUES
     ('20260515170000'),
     ('20260516120000'),
     ('20260516200000'),
-    ('20260516200100');
+    ('20260516200100'),
+    ('20260517010000');
diff --git a/docs/MIGRATIONS.md b/docs/MIGRATIONS.md
@@ -112,6 +112,8 @@ A single `DELETE FROM connections WHERE id IN (...)` triggers an internal `UPDAT
 
 Indexing alone helps the cascade, but it doesn't eliminate the per-row WAL write; batching before the delete is what keeps the API responsive.
 
+**Lobu-specific policy:** **connections are never hard-deleted in prod.** Setting `connections.deleted_at` is the final state. The `events_connection_id_fkey ... ON DELETE SET NULL` cascade exists in the schema for completeness, but actually invoking it (`DELETE FROM connections WHERE deleted_at IS NOT NULL`) blocks the API for ~13s per connection at current scale — the 2026-05-16 `pg_stat_statements` showed exactly this pattern at rank #8 (5 calls × 13.4s each). Soft-deleted connections cost ~50 bytes apiece in the `connections` table; the occasional accumulation isn't worth the recurring stall. The connection-creation rollback path in `tools/admin/manage_connections.ts` (which only deletes never-activated rows that have no events yet) is the only acceptable use of `DELETE FROM connections`.
+
 ### Bare `DROP INDEX`
 
 Takes `ACCESS EXCLUSIVE`. Use `DROP INDEX CONCURRENTLY` (also `transaction:false`).

diff --git a/packages/server/src/tools/admin/manage_connections.ts b/packages/server/src/tools/admin/manage_connections.ts
@@ -539,7 +539,13 @@ async function handleList(
               AND NOT (dw.id IS NOT NULL AND dw.last_seen_at > now() - interval '20 minutes')
              THEN 'offline'
            END AS device_status,
-           (SELECT COUNT(*) FROM current_event_records e WHERE e.connection_id = c.id)::int AS event_count,
+           -- event_count intentionally omitted from list responses: the
+           -- per-row correlated count via current_event_records does a
+           -- supersedes anti-join over the events table and was the dominant
+           -- cost in this query (1303ms mean → 2.3ms without it; see the
+           -- post-incident perf brainstorm). For the per-connection detail
+           -- page, handleGet below still computes it — that path is a single
+           -- row and costs ~1.2ms.
            (SELECT COUNT(*) FROM feeds f WHERE f.connection_id = c.id AND f.deleted_at IS NULL)::int AS feed_count,
            (SELECT ct.token FROM connect_tokens ct
             WHERE ct.connection_id = c.id AND ct.status = 'pending' AND ct.expires_at > NOW()

diff --git a/packages/web b/packages/web