From 0d7407e8439b4aa74744a4573881a29ff3bde9c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Sun, 26 Apr 2026 23:45:37 +0100 Subject: [PATCH 1/3] feat(world-model): cross-org relationship_types + catalog discovery in search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the two BLOCKER gaps pi flagged after #374: 1. **Schema search path for entity_relationship_types** (`tools/admin/manage_entity.ts::handleLink`). Mirrors what #374 did for entity_types: tenant first, then any `visibility='public'` org. Tenant-local relationship types still win. Without this, even though entities can use public-catalog vocabulary, relationships couldn't — e.g. a tenant relating their `\$member` to a canonical Apple Inc would have to register a local copy of `works_at`. 2. **Public-catalog discovery in `tools/search.ts`**. Adds an `include_public_catalogs` arg (defaults to true) so tenant agents can find canonical entities (HMRC, banks, currencies, …) by name/type without knowing entity ids upfront. Result rows already carry `organization_id`, so the agent can tell tenant-local from canonical hits. `fetchEntityById` widens the same way so an entity_id lookup following a search hit resolves cleanly. No DB migration. Tests: - `tools/__tests__/search-cross-org.test.ts` (3): public+tenant in one call; flag=false hides public; private orgs not snooped - `entity-relationships.test.ts`: tenant uses a `works-at-public` relationship_type defined in a public catalog org --- .../entity-relationships.test.ts | 33 +++++ .../tools/__tests__/search-cross-org.test.ts | 113 ++++++++++++++++++ .../src/tools/admin/manage_entity.ts | 17 ++- packages/owletto-backend/src/tools/search.ts | 27 ++++- 4 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 packages/owletto-backend/src/tools/__tests__/search-cross-org.test.ts diff --git a/packages/owletto-backend/src/__tests__/integration/relationships/entity-relationships.test.ts b/packages/owletto-backend/src/__tests__/integration/relationships/entity-relationships.test.ts index efdf5424c..5dc6fc89f 100644 --- a/packages/owletto-backend/src/__tests__/integration/relationships/entity-relationships.test.ts +++ b/packages/owletto-backend/src/__tests__/integration/relationships/entity-relationships.test.ts @@ -606,6 +606,39 @@ describe('Entity Relationships', () => { expect(result.relationship.organization_id).toBe(orgA.id); }); + it('should resolve a relationship_type defined in a public-catalog org (cross-org type vocabulary)', async () => { + // Set up a public catalog with a canonical relationship type the + // tenant doesn't have locally. Mirrors how `works_at` would live in + // public-uk-finance. + const publicCatalog = await createTestOrganization({ + name: 'Public Catalog Type', + visibility: 'public', + }); + const publicEntity = await createTestEntity({ + name: 'Canonical Co', + entity_type: 'brand', + organization_id: publicCatalog.id, + }); + const sql = getTestDb(); + await sql` + INSERT INTO entity_relationship_types (organization_id, slug, name, is_symmetric, created_at, updated_at) + VALUES (${publicCatalog.id}, 'works-at-public', 'Works At', false, current_timestamp, current_timestamp) + `; + + const result = await mcpToolsCall( + 'manage_entity', + { + action: 'link', + from_entity_id: entityA1.id, + to_entity_id: publicEntity.id, + relationship_type_slug: 'works-at-public', + }, + { token: tokenA } + ); + expect(result.action).toBe('link'); + expect(result.relationship.organization_id).toBe(orgA.id); + }); + it('should reject a relationship whose source is in a different org from the caller', async () => { // userA is signed in (tokenA → orgA), but the source entity is in orgB. // Even though tokenA's caller has access to read entityB1, they cannot diff --git a/packages/owletto-backend/src/tools/__tests__/search-cross-org.test.ts b/packages/owletto-backend/src/tools/__tests__/search-cross-org.test.ts new file mode 100644 index 000000000..702295095 --- /dev/null +++ b/packages/owletto-backend/src/tools/__tests__/search-cross-org.test.ts @@ -0,0 +1,113 @@ +/** + * Search tool surfaces public-catalog entities so tenant agents can discover + * canonical entities (HMRC, banks, currencies) without knowing their IDs + * upfront. Caller's-org entities still come back; public ones are added when + * the include_public_catalogs flag is on (default). + */ + +import { beforeEach, describe, expect, it } from 'vitest'; +import { cleanupTestDatabase } from '../../__tests__/setup/test-db'; +import { + addUserToOrganization, + createTestEntity, + createTestOrganization, + createTestUser, +} from '../../__tests__/setup/test-fixtures'; +import { search } from '../search'; + +describe('search cross-org public catalog discovery', () => { + beforeEach(async () => { + await cleanupTestDatabase(); + }); + + it('returns matching entities from public-catalog orgs alongside tenant hits', async () => { + const tenant = await createTestOrganization({ name: 'Tenant Search' }); + const publicCatalog = await createTestOrganization({ + name: 'Public Catalog Search', + visibility: 'public', + }); + const user = await createTestUser(); + await addUserToOrganization(user.id, tenant.id, 'owner'); + + const tenantEntity = await createTestEntity({ + name: 'Apple Local', + entity_type: 'brand', + organization_id: tenant.id, + }); + const publicEntity = await createTestEntity({ + name: 'Apple Inc', + entity_type: 'brand', + organization_id: publicCatalog.id, + }); + + const result = await search( + { query: 'Apple', fuzzy: true, include_content: false }, + {} as Parameters[1], + { organizationId: tenant.id, userId: user.id } as Parameters[2] + ); + + const ids = result.entities.map((e: { id: number }) => e.id); + expect(ids).toContain(tenantEntity.id); + expect(ids).toContain(publicEntity.id); + }); + + it('omits public-catalog hits when include_public_catalogs=false', async () => { + const tenant = await createTestOrganization({ name: 'Tenant Local-Only' }); + const publicCatalog = await createTestOrganization({ + name: 'Public Catalog Local-Only', + visibility: 'public', + }); + const user = await createTestUser(); + await addUserToOrganization(user.id, tenant.id, 'owner'); + + await createTestEntity({ + name: 'Local Apple', + entity_type: 'brand', + organization_id: tenant.id, + }); + const publicEntity = await createTestEntity({ + name: 'Public Apple', + entity_type: 'brand', + organization_id: publicCatalog.id, + }); + + const result = await search( + { + query: 'Apple', + fuzzy: true, + include_content: false, + include_public_catalogs: false, + }, + {} as Parameters[1], + { organizationId: tenant.id, userId: user.id } as Parameters[2] + ); + + const ids = result.entities.map((e: { id: number }) => e.id); + expect(ids).not.toContain(publicEntity.id); + }); + + it('does not surface entities from private orgs the caller is not in', async () => { + const tenant = await createTestOrganization({ name: 'Tenant No-Snoop Search' }); + const otherPrivate = await createTestOrganization({ + name: 'Some Other Private', + visibility: 'private', + }); + const user = await createTestUser(); + await addUserToOrganization(user.id, tenant.id, 'owner'); + + const privateEntity = await createTestEntity({ + name: 'Hidden Apple', + entity_type: 'brand', + organization_id: otherPrivate.id, + }); + + const result = await search( + { query: 'Apple', fuzzy: true, include_content: false, include_public_catalogs: true }, + {} as Parameters[1], + { organizationId: tenant.id, userId: user.id } as Parameters[2] + ); + + const ids = result.entities.map((e: { id: number }) => e.id); + expect(ids).not.toContain(privateEntity.id); + }); +}); diff --git a/packages/owletto-backend/src/tools/admin/manage_entity.ts b/packages/owletto-backend/src/tools/admin/manage_entity.ts index 6d5c4c4c9..d0d2fe412 100644 --- a/packages/owletto-backend/src/tools/admin/manage_entity.ts +++ b/packages/owletto-backend/src/tools/admin/manage_entity.ts @@ -917,9 +917,22 @@ async function handleLink( validateNoSelfReference(args.from_entity_id, args.to_entity_id); await validateScopeRule(args.from_entity_id, args.to_entity_id, env, ctx); + // Schema search path for relationship types: tenant first, then any + // visibility='public' catalog. Mirrors createEntity's resolver so a tenant + // can use a canonical relationship type like `works_at` defined in + // public-uk-finance without registering a local copy. Tenant-local types + // win when both exist. const typeRows = await sql` - SELECT id, is_symmetric FROM entity_relationship_types - WHERE slug = ${args.relationship_type_slug} AND organization_id = ${ctx.organizationId} AND deleted_at IS NULL + SELECT rt.id, rt.is_symmetric + FROM entity_relationship_types rt + LEFT JOIN organization o ON o.id = rt.organization_id + WHERE rt.slug = ${args.relationship_type_slug} + AND rt.deleted_at IS NULL + AND ( + rt.organization_id = ${ctx.organizationId} + OR o.visibility = 'public' + ) + ORDER BY (rt.organization_id = ${ctx.organizationId}) DESC, rt.id ASC LIMIT 1 `; if (typeRows.length === 0) { diff --git a/packages/owletto-backend/src/tools/search.ts b/packages/owletto-backend/src/tools/search.ts index 1517e4f21..8ca74f9fd 100644 --- a/packages/owletto-backend/src/tools/search.ts +++ b/packages/owletto-backend/src/tools/search.ts @@ -108,6 +108,13 @@ export const SearchSchema = Type.Object({ maximum: 100, }) ), + include_public_catalogs: Type.Optional( + Type.Boolean({ + description: + 'Also search public-catalog orgs (visibility=public) — canonical world entities like HMRC, banks, currencies. Defaults to true so agents can discover entities to reference cross-org.', + default: true, + }) + ), }); type SearchArgs = Static; @@ -509,8 +516,19 @@ async function queryEntities( conditions.push('e.embedding IS NOT NULL'); } - // Organization filter - conditions.push(`e.organization_id = $${addParam(organizationId)}`); + // Organization filter — caller's org always; public-catalog orgs when the + // flag is on (default), so an agent looking up "Apple" finds tenant-local + // and canonical hits in one call. The result row carries the org_id so the + // agent can tell which is which. + const includePublic = args.include_public_catalogs ?? true; + if (includePublic) { + const orgParamIdx = addParam(organizationId); + conditions.push( + `(e.organization_id = $${orgParamIdx} OR EXISTS (SELECT 1 FROM organization o WHERE o.id = e.organization_id AND o.visibility = 'public'))` + ); + } else { + conditions.push(`e.organization_id = $${addParam(organizationId)}`); + } if (args.entity_type) conditions.push(`et.slug = $${addParam(args.entity_type)}`); if (args.parent_id) conditions.push(`e.parent_id = $${addParam(args.parent_id)}`); @@ -578,11 +596,14 @@ async function queryEntities( async function fetchEntityById(entityId: number, _env: Env, organizationId: string) { const sql = getDb(); + // Caller's org or any visibility=public catalog. Lets entity_id lookup find + // canonical entities (HMRC, banks) the agent has discovered via search. const result = await sql.unsafe( `SELECT ${ENTITY_SELECT_COLUMNS} ${ENTITY_JOINS} + LEFT JOIN organization eo ON eo.id = e.organization_id WHERE e.id = $1 - AND e.organization_id = $2 + AND (e.organization_id = $2 OR eo.visibility = 'public') AND e.deleted_at IS NULL`, [entityId, organizationId] ); From e222fafc952d9f83a17fc6f13b63ec567dc98d0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Sun, 26 Apr 2026 23:50:28 +0100 Subject: [PATCH 2/3] fix(cross-org-fixes): close privacy leaks in cross-org search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pi flagged two BLOCKERS in the previous round: 1. **Connection metadata leak.** `formatEntityResult` calls `fetchConnectionsForEntity(primaryEntity.id)` with no caller-org scope. For a public-catalog entity referenced by multiple tenants, any tenant searching that entity would receive other tenants' connection display names, configs, and feed entity names. Now skipped when the primary entity is in a different org from the caller — connections are tenant operational data, never canonical. 2. **Cross-tenant stat side channel.** Count subqueries in the SELECT (content_count, connection_count, watcher_count, children_count) computed globally for the entity id; for public-catalog entities referenced from many tenants, this leaks aggregate activity volumes. Now gated `CASE WHEN e.organization_id = $callerOrg THEN ... ELSE 0 END` for each count, so cross-org rows return zeros for operational stats. Children query also scoped to primary's own org. Also addressing IMPORTANT #3: tenant-local results were getting pushed out by high-scoring public matches. ORDER BY now `(e.organization_id = $caller) DESC, match_score DESC` so caller-org wins ties. --- packages/owletto-backend/src/tools/search.ts | 101 +++++++++++++------ 1 file changed, 70 insertions(+), 31 deletions(-) diff --git a/packages/owletto-backend/src/tools/search.ts b/packages/owletto-backend/src/tools/search.ts index 8ca74f9fd..71d9853cd 100644 --- a/packages/owletto-backend/src/tools/search.ts +++ b/packages/owletto-backend/src/tools/search.ts @@ -435,29 +435,48 @@ async function fetchTopEntitiesByType( // Query Helper Functions // ============================================ -const ENTITY_SELECT_COLUMNS = ` +// Build the entity SELECT projection. The count subqueries (events, +// connections, watchers, children) are tenant-private operational data: +// running them globally for a public-catalog entity would leak other +// tenants' activity volumes through aggregate counts. Each count is +// gated on `e.organization_id = $callerOrg` so we return zeros for +// cross-org rows. Caller passes the parameter index for their org. +function entitySelectColumns(callerOrgParamIdx: number): string { + const ownOrg = `e.organization_id = $${callerOrgParamIdx}`; + return ` e.id, e.organization_id, e.name, et.slug AS entity_type, e.slug, e.metadata, e.parent_id, pe.name as parent_name, pe.slug as parent_slug, pet.slug as parent_entity_type, - COALESCE((SELECT COUNT(*) FROM current_event_records ev WHERE ${entityLinkMatchSql('e.id::bigint', 'ev')}), 0) as content_count, - COALESCE(( - SELECT COUNT(DISTINCT cn.connector_key) - FROM feeds f - JOIN connections cn ON cn.id = f.connection_id - WHERE e.id = ANY(f.entity_ids) - AND f.deleted_at IS NULL - AND cn.deleted_at IS NULL - ), 0) as connection_count, - COALESCE(( - SELECT COUNT(DISTINCT cn.connector_key) - FROM feeds f - JOIN connections cn ON cn.id = f.connection_id - WHERE e.id = ANY(f.entity_ids) - AND f.deleted_at IS NULL - AND cn.deleted_at IS NULL - AND cn.status = 'active' - ), 0) as active_connection_count, - COALESCE((SELECT COUNT(*) FROM entities c WHERE c.parent_id = e.id), 0) as children_count, - COALESCE((SELECT COUNT(*) FROM watchers i WHERE e.id = ANY(i.entity_ids)), 0) as watcher_count`; + CASE WHEN ${ownOrg} THEN + COALESCE((SELECT COUNT(*) FROM current_event_records ev WHERE ${entityLinkMatchSql('e.id::bigint', 'ev')}), 0) + ELSE 0 END as content_count, + CASE WHEN ${ownOrg} THEN + COALESCE(( + SELECT COUNT(DISTINCT cn.connector_key) + FROM feeds f + JOIN connections cn ON cn.id = f.connection_id + WHERE e.id = ANY(f.entity_ids) + AND f.deleted_at IS NULL + AND cn.deleted_at IS NULL + ), 0) + ELSE 0 END as connection_count, + CASE WHEN ${ownOrg} THEN + COALESCE(( + SELECT COUNT(DISTINCT cn.connector_key) + FROM feeds f + JOIN connections cn ON cn.id = f.connection_id + WHERE e.id = ANY(f.entity_ids) + AND f.deleted_at IS NULL + AND cn.deleted_at IS NULL + AND cn.status = 'active' + ), 0) + ELSE 0 END as active_connection_count, + CASE WHEN ${ownOrg} THEN + COALESCE((SELECT COUNT(*) FROM entities c WHERE c.parent_id = e.id AND c.organization_id = e.organization_id), 0) + ELSE 0 END as children_count, + CASE WHEN ${ownOrg} THEN + COALESCE((SELECT COUNT(*) FROM watchers i WHERE e.id = ANY(i.entity_ids) AND i.organization_id = e.organization_id), 0) + ELSE 0 END as watcher_count`; +} const ENTITY_JOINS = ` FROM entities e @@ -519,15 +538,18 @@ async function queryEntities( // Organization filter — caller's org always; public-catalog orgs when the // flag is on (default), so an agent looking up "Apple" finds tenant-local // and canonical hits in one call. The result row carries the org_id so the - // agent can tell which is which. + // agent can tell which is which. The same param index is reused by the + // count subqueries in entitySelectColumns(orgParamIdx), which gate + // operational counts (events, connections, watchers) on caller-org rows + // so cross-org public results don't leak other tenants' activity. const includePublic = args.include_public_catalogs ?? true; + const orgParamIdx = addParam(organizationId); if (includePublic) { - const orgParamIdx = addParam(organizationId); conditions.push( `(e.organization_id = $${orgParamIdx} OR EXISTS (SELECT 1 FROM organization o WHERE o.id = e.organization_id AND o.visibility = 'public'))` ); } else { - conditions.push(`e.organization_id = $${addParam(organizationId)}`); + conditions.push(`e.organization_id = $${orgParamIdx}`); } if (args.entity_type) conditions.push(`et.slug = $${addParam(args.entity_type)}`); @@ -577,13 +599,13 @@ async function queryEntities( } const rows = await sql.unsafe( - `SELECT ${ENTITY_SELECT_COLUMNS}, + `SELECT ${entitySelectColumns(orgParamIdx)}, ${scoreExpr} as match_score, '${matchReason}' as match_reason, ${vectorSimExpr} as vector_similarity ${ENTITY_JOINS} WHERE ${whereClause} - ORDER BY match_score DESC + ORDER BY (e.organization_id = $${orgParamIdx}) DESC, match_score DESC LIMIT ${limit}`, params ); @@ -598,8 +620,10 @@ async function fetchEntityById(entityId: number, _env: Env, organizationId: stri // Caller's org or any visibility=public catalog. Lets entity_id lookup find // canonical entities (HMRC, banks) the agent has discovered via search. + // Operational counts (events, connections, watchers) are gated on + // caller-org so cross-org public hits don't leak other tenants' activity. const result = await sql.unsafe( - `SELECT ${ENTITY_SELECT_COLUMNS} + `SELECT ${entitySelectColumns(2)} ${ENTITY_JOINS} LEFT JOIN organization eo ON eo.id = e.organization_id WHERE e.id = $1 @@ -648,16 +672,28 @@ async function formatEntityResult( const baseUrl = getPublicWebUrl(ctx.requestUrl, ctx.baseUrl); const primaryEntity = matches[0]; + const primaryRow = entityRows[0]; const entityType = primaryEntity.type; const isRootEntity = !primaryEntity.parent_id; - // Fetch connections if requested (default: true) + // Fetch connections if requested (default: true). Public-catalog entities + // are referenced by many tenants; running fetchConnectionsForEntity on + // them would surface other tenants' private connection metadata + // (display_name, config, feed entity names). Connections are per-tenant + // operational data, never canonical, so skip them entirely for cross-org + // public results. let connections: ConnectionInfo[] | undefined; - if (args.include_connections ?? true) { + const primaryIsCallerOrg = + String(primaryRow.organization_id) === ctx.organizationId; + if ((args.include_connections ?? true) && primaryIsCallerOrg) { connections = await fetchConnectionsForEntity(primaryEntity.id); } - // Fetch children for root entities (no parent) + // Fetch children for root entities (no parent). Children are scoped to + // the primary's own org — preserves the parent-org boundary and stops + // tenant-private "child of HMRC"-style rows from leaking when the primary + // is a cross-org public entity. content_count likewise scoped to that + // entity's own org so we don't aggregate other tenants' activity. let children: UnifiedSearchResult['children']; if (isRootEntity) { const childRows = await getDb()` @@ -667,12 +703,15 @@ async function formatEntityResult( et.slug AS entity_type, e.metadata::jsonb->>'market' as market, COALESCE( - (SELECT COUNT(*) FROM current_event_records WHERE e.id = ANY(entity_ids)), + (SELECT COUNT(*) FROM current_event_records ev + WHERE e.id = ANY(ev.entity_ids) + AND ev.organization_id = e.organization_id), 0 ) as content_count FROM entities e JOIN entity_types et ON et.id = e.entity_type_id WHERE e.parent_id = ${primaryEntity.id} + AND e.organization_id = ${primaryRow.organization_id} ORDER BY e.created_at DESC `; children = childRows.map((row) => ({ From 89f2ec50dd0088c2b1ed3298e2ddefd2462f6204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Sun, 26 Apr 2026 23:51:20 +0100 Subject: [PATCH 3/3] fix(cross-org-fixes): zero out children content_count for cross-org primaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pi follow-up: maintains the 'operational counts are zero for cross-org' invariant consistently — children of a public-catalog primary now show content_count=0 to match the primary's own zeroed stats. --- packages/owletto-backend/src/tools/search.ts | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/owletto-backend/src/tools/search.ts b/packages/owletto-backend/src/tools/search.ts index 71d9853cd..85ab7a9d2 100644 --- a/packages/owletto-backend/src/tools/search.ts +++ b/packages/owletto-backend/src/tools/search.ts @@ -692,8 +692,8 @@ async function formatEntityResult( // Fetch children for root entities (no parent). Children are scoped to // the primary's own org — preserves the parent-org boundary and stops // tenant-private "child of HMRC"-style rows from leaking when the primary - // is a cross-org public entity. content_count likewise scoped to that - // entity's own org so we don't aggregate other tenants' activity. + // is a cross-org public entity. content_count is zeroed for cross-org + // primaries to match the same invariant the parent's stats follow. let children: UnifiedSearchResult['children']; if (isRootEntity) { const childRows = await getDb()` @@ -702,12 +702,14 @@ async function formatEntityResult( e.name, et.slug AS entity_type, e.metadata::jsonb->>'market' as market, - COALESCE( - (SELECT COUNT(*) FROM current_event_records ev - WHERE e.id = ANY(ev.entity_ids) - AND ev.organization_id = e.organization_id), - 0 - ) as content_count + CASE WHEN ${primaryIsCallerOrg} THEN + COALESCE( + (SELECT COUNT(*) FROM current_event_records ev + WHERE e.id = ANY(ev.entity_ids) + AND ev.organization_id = e.organization_id), + 0 + ) + ELSE 0 END as content_count FROM entities e JOIN entity_types et ON et.id = e.entity_type_id WHERE e.parent_id = ${primaryEntity.id}