Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,39 @@ describe('Entity Relationships', () => {
expect(result.relationship.organization_id).toBe(orgA.id);
});

it('should resolve a relationship_type defined in a public-catalog org (cross-org type vocabulary)', async () => {
// Set up a public catalog with a canonical relationship type the
// tenant doesn't have locally. Mirrors how `works_at` would live in
// public-uk-finance.
const publicCatalog = await createTestOrganization({
name: 'Public Catalog Type',
visibility: 'public',
});
const publicEntity = await createTestEntity({
name: 'Canonical Co',
entity_type: 'brand',
organization_id: publicCatalog.id,
});
const sql = getTestDb();
await sql`
INSERT INTO entity_relationship_types (organization_id, slug, name, is_symmetric, created_at, updated_at)
VALUES (${publicCatalog.id}, 'works-at-public', 'Works At', false, current_timestamp, current_timestamp)
`;

const result = await mcpToolsCall(
'manage_entity',
{
action: 'link',
from_entity_id: entityA1.id,
to_entity_id: publicEntity.id,
relationship_type_slug: 'works-at-public',
},
{ token: tokenA }
);
expect(result.action).toBe('link');
expect(result.relationship.organization_id).toBe(orgA.id);
});

it('should reject a relationship whose source is in a different org from the caller', async () => {
// userA is signed in (tokenA → orgA), but the source entity is in orgB.
// Even though tokenA's caller has access to read entityB1, they cannot
Expand Down
113 changes: 113 additions & 0 deletions packages/owletto-backend/src/tools/__tests__/search-cross-org.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/**
* Search tool surfaces public-catalog entities so tenant agents can discover
* canonical entities (HMRC, banks, currencies) without knowing their IDs
* upfront. Caller's-org entities still come back; public ones are added when
* the include_public_catalogs flag is on (default).
*/

import { beforeEach, describe, expect, it } from 'vitest';
import { cleanupTestDatabase } from '../../__tests__/setup/test-db';
import {
addUserToOrganization,
createTestEntity,
createTestOrganization,
createTestUser,
} from '../../__tests__/setup/test-fixtures';
import { search } from '../search';

describe('search cross-org public catalog discovery', () => {
beforeEach(async () => {
await cleanupTestDatabase();
});

it('returns matching entities from public-catalog orgs alongside tenant hits', async () => {
const tenant = await createTestOrganization({ name: 'Tenant Search' });
const publicCatalog = await createTestOrganization({
name: 'Public Catalog Search',
visibility: 'public',
});
const user = await createTestUser();
await addUserToOrganization(user.id, tenant.id, 'owner');

const tenantEntity = await createTestEntity({
name: 'Apple Local',
entity_type: 'brand',
organization_id: tenant.id,
});
const publicEntity = await createTestEntity({
name: 'Apple Inc',
entity_type: 'brand',
organization_id: publicCatalog.id,
});

const result = await search(
{ query: 'Apple', fuzzy: true, include_content: false },
{} as Parameters<typeof search>[1],
{ organizationId: tenant.id, userId: user.id } as Parameters<typeof search>[2]
);

const ids = result.entities.map((e: { id: number }) => e.id);
expect(ids).toContain(tenantEntity.id);
expect(ids).toContain(publicEntity.id);
});

it('omits public-catalog hits when include_public_catalogs=false', async () => {
const tenant = await createTestOrganization({ name: 'Tenant Local-Only' });
const publicCatalog = await createTestOrganization({
name: 'Public Catalog Local-Only',
visibility: 'public',
});
const user = await createTestUser();
await addUserToOrganization(user.id, tenant.id, 'owner');

await createTestEntity({
name: 'Local Apple',
entity_type: 'brand',
organization_id: tenant.id,
});
const publicEntity = await createTestEntity({
name: 'Public Apple',
entity_type: 'brand',
organization_id: publicCatalog.id,
});

const result = await search(
{
query: 'Apple',
fuzzy: true,
include_content: false,
include_public_catalogs: false,
},
{} as Parameters<typeof search>[1],
{ organizationId: tenant.id, userId: user.id } as Parameters<typeof search>[2]
);

const ids = result.entities.map((e: { id: number }) => e.id);
expect(ids).not.toContain(publicEntity.id);
});

it('does not surface entities from private orgs the caller is not in', async () => {
const tenant = await createTestOrganization({ name: 'Tenant No-Snoop Search' });
const otherPrivate = await createTestOrganization({
name: 'Some Other Private',
visibility: 'private',
});
const user = await createTestUser();
await addUserToOrganization(user.id, tenant.id, 'owner');

const privateEntity = await createTestEntity({
name: 'Hidden Apple',
entity_type: 'brand',
organization_id: otherPrivate.id,
});

const result = await search(
{ query: 'Apple', fuzzy: true, include_content: false, include_public_catalogs: true },
{} as Parameters<typeof search>[1],
{ organizationId: tenant.id, userId: user.id } as Parameters<typeof search>[2]
);

const ids = result.entities.map((e: { id: number }) => e.id);
expect(ids).not.toContain(privateEntity.id);
});
});
17 changes: 15 additions & 2 deletions packages/owletto-backend/src/tools/admin/manage_entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -917,9 +917,22 @@ async function handleLink(
validateNoSelfReference(args.from_entity_id, args.to_entity_id);
await validateScopeRule(args.from_entity_id, args.to_entity_id, env, ctx);

// Schema search path for relationship types: tenant first, then any
// visibility='public' catalog. Mirrors createEntity's resolver so a tenant
// can use a canonical relationship type like `works_at` defined in
// public-uk-finance without registering a local copy. Tenant-local types
// win when both exist.
const typeRows = await sql`
SELECT id, is_symmetric FROM entity_relationship_types
WHERE slug = ${args.relationship_type_slug} AND organization_id = ${ctx.organizationId} AND deleted_at IS NULL
SELECT rt.id, rt.is_symmetric
FROM entity_relationship_types rt
LEFT JOIN organization o ON o.id = rt.organization_id
WHERE rt.slug = ${args.relationship_type_slug}
AND rt.deleted_at IS NULL
AND (
rt.organization_id = ${ctx.organizationId}
OR o.visibility = 'public'
)
ORDER BY (rt.organization_id = ${ctx.organizationId}) DESC, rt.id ASC
LIMIT 1
`;
if (typeRows.length === 0) {
Expand Down
130 changes: 96 additions & 34 deletions packages/owletto-backend/src/tools/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ export const SearchSchema = Type.Object({
maximum: 100,
})
),
include_public_catalogs: Type.Optional(
Type.Boolean({
description:
'Also search public-catalog orgs (visibility=public) — canonical world entities like HMRC, banks, currencies. Defaults to true so agents can discover entities to reference cross-org.',
default: true,
})
),
});

type SearchArgs = Static<typeof SearchSchema>;
Expand Down Expand Up @@ -428,29 +435,48 @@ async function fetchTopEntitiesByType(
// Query Helper Functions
// ============================================

const ENTITY_SELECT_COLUMNS = `
// Build the entity SELECT projection. The count subqueries (events,
// connections, watchers, children) are tenant-private operational data:
// running them globally for a public-catalog entity would leak other
// tenants' activity volumes through aggregate counts. Each count is
// gated on `e.organization_id = $callerOrg` so we return zeros for
// cross-org rows. Caller passes the parameter index for their org.
function entitySelectColumns(callerOrgParamIdx: number): string {
const ownOrg = `e.organization_id = $${callerOrgParamIdx}`;
return `
e.id, e.organization_id, e.name, et.slug AS entity_type, e.slug, e.metadata, e.parent_id,
pe.name as parent_name, pe.slug as parent_slug, pet.slug as parent_entity_type,
COALESCE((SELECT COUNT(*) FROM current_event_records ev WHERE ${entityLinkMatchSql('e.id::bigint', 'ev')}), 0) as content_count,
COALESCE((
SELECT COUNT(DISTINCT cn.connector_key)
FROM feeds f
JOIN connections cn ON cn.id = f.connection_id
WHERE e.id = ANY(f.entity_ids)
AND f.deleted_at IS NULL
AND cn.deleted_at IS NULL
), 0) as connection_count,
COALESCE((
SELECT COUNT(DISTINCT cn.connector_key)
FROM feeds f
JOIN connections cn ON cn.id = f.connection_id
WHERE e.id = ANY(f.entity_ids)
AND f.deleted_at IS NULL
AND cn.deleted_at IS NULL
AND cn.status = 'active'
), 0) as active_connection_count,
COALESCE((SELECT COUNT(*) FROM entities c WHERE c.parent_id = e.id), 0) as children_count,
COALESCE((SELECT COUNT(*) FROM watchers i WHERE e.id = ANY(i.entity_ids)), 0) as watcher_count`;
CASE WHEN ${ownOrg} THEN
COALESCE((SELECT COUNT(*) FROM current_event_records ev WHERE ${entityLinkMatchSql('e.id::bigint', 'ev')}), 0)
ELSE 0 END as content_count,
CASE WHEN ${ownOrg} THEN
COALESCE((
SELECT COUNT(DISTINCT cn.connector_key)
FROM feeds f
JOIN connections cn ON cn.id = f.connection_id
WHERE e.id = ANY(f.entity_ids)
AND f.deleted_at IS NULL
AND cn.deleted_at IS NULL
), 0)
ELSE 0 END as connection_count,
CASE WHEN ${ownOrg} THEN
COALESCE((
SELECT COUNT(DISTINCT cn.connector_key)
FROM feeds f
JOIN connections cn ON cn.id = f.connection_id
WHERE e.id = ANY(f.entity_ids)
AND f.deleted_at IS NULL
AND cn.deleted_at IS NULL
AND cn.status = 'active'
), 0)
ELSE 0 END as active_connection_count,
CASE WHEN ${ownOrg} THEN
COALESCE((SELECT COUNT(*) FROM entities c WHERE c.parent_id = e.id AND c.organization_id = e.organization_id), 0)
ELSE 0 END as children_count,
CASE WHEN ${ownOrg} THEN
COALESCE((SELECT COUNT(*) FROM watchers i WHERE e.id = ANY(i.entity_ids) AND i.organization_id = e.organization_id), 0)
ELSE 0 END as watcher_count`;
}

const ENTITY_JOINS = `
FROM entities e
Expand Down Expand Up @@ -509,8 +535,22 @@ async function queryEntities(
conditions.push('e.embedding IS NOT NULL');
}

// Organization filter
conditions.push(`e.organization_id = $${addParam(organizationId)}`);
// Organization filter — caller's org always; public-catalog orgs when the
// flag is on (default), so an agent looking up "Apple" finds tenant-local
// and canonical hits in one call. The result row carries the org_id so the
// agent can tell which is which. The same param index is reused by the
// count subqueries in entitySelectColumns(orgParamIdx), which gate
// operational counts (events, connections, watchers) on caller-org rows
// so cross-org public results don't leak other tenants' activity.
const includePublic = args.include_public_catalogs ?? true;
const orgParamIdx = addParam(organizationId);
if (includePublic) {
conditions.push(
`(e.organization_id = $${orgParamIdx} OR EXISTS (SELECT 1 FROM organization o WHERE o.id = e.organization_id AND o.visibility = 'public'))`
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Prevent cross-tenant config leak from public entity search

Expanding the search scope here to all visibility='public' entities means a public entity can become the primary hit, after which formatEntityResult calls fetchConnectionsForEntity (default include_connections=true) and returns c.config without scoping by caller org. In any environment where multiple tenants attach feeds to the same canonical public entity, this allows one tenant’s search call to read other tenants’ connection metadata/config for that entity. Please restrict connection hydration to ctx.organizationId (or skip it for non-caller-org entities) when public results are enabled.

Useful? React with 👍 / 👎.

);
} else {
conditions.push(`e.organization_id = $${orgParamIdx}`);
}

if (args.entity_type) conditions.push(`et.slug = $${addParam(args.entity_type)}`);
if (args.parent_id) conditions.push(`e.parent_id = $${addParam(args.parent_id)}`);
Expand Down Expand Up @@ -559,13 +599,13 @@ async function queryEntities(
}

const rows = await sql.unsafe<EntityQueryRow>(
`SELECT ${ENTITY_SELECT_COLUMNS},
`SELECT ${entitySelectColumns(orgParamIdx)},
${scoreExpr} as match_score,
'${matchReason}' as match_reason,
${vectorSimExpr} as vector_similarity
${ENTITY_JOINS}
WHERE ${whereClause}
ORDER BY match_score DESC
ORDER BY (e.organization_id = $${orgParamIdx}) DESC, match_score DESC
LIMIT ${limit}`,
params
);
Expand All @@ -578,11 +618,16 @@ async function queryEntities(
async function fetchEntityById(entityId: number, _env: Env, organizationId: string) {
const sql = getDb();

// Caller's org or any visibility=public catalog. Lets entity_id lookup find
// canonical entities (HMRC, banks) the agent has discovered via search.
// Operational counts (events, connections, watchers) are gated on
// caller-org so cross-org public hits don't leak other tenants' activity.
const result = await sql.unsafe<EntityQueryRow>(
`SELECT ${ENTITY_SELECT_COLUMNS}
`SELECT ${entitySelectColumns(2)}
${ENTITY_JOINS}
LEFT JOIN organization eo ON eo.id = e.organization_id
WHERE e.id = $1
AND e.organization_id = $2
AND (e.organization_id = $2 OR eo.visibility = 'public')
AND e.deleted_at IS NULL`,
[entityId, organizationId]
);
Expand Down Expand Up @@ -627,16 +672,28 @@ async function formatEntityResult(

const baseUrl = getPublicWebUrl(ctx.requestUrl, ctx.baseUrl);
const primaryEntity = matches[0];
const primaryRow = entityRows[0];
const entityType = primaryEntity.type;
const isRootEntity = !primaryEntity.parent_id;

// Fetch connections if requested (default: true)
// Fetch connections if requested (default: true). Public-catalog entities
// are referenced by many tenants; running fetchConnectionsForEntity on
// them would surface other tenants' private connection metadata
// (display_name, config, feed entity names). Connections are per-tenant
// operational data, never canonical, so skip them entirely for cross-org
// public results.
let connections: ConnectionInfo[] | undefined;
if (args.include_connections ?? true) {
const primaryIsCallerOrg =
String(primaryRow.organization_id) === ctx.organizationId;
if ((args.include_connections ?? true) && primaryIsCallerOrg) {
connections = await fetchConnectionsForEntity(primaryEntity.id);
}

// Fetch children for root entities (no parent)
// Fetch children for root entities (no parent). Children are scoped to
// the primary's own org — preserves the parent-org boundary and stops
// tenant-private "child of HMRC"-style rows from leaking when the primary
// is a cross-org public entity. content_count is zeroed for cross-org
// primaries to match the same invariant the parent's stats follow.
let children: UnifiedSearchResult['children'];
if (isRootEntity) {
const childRows = await getDb()<ChildEntityRow>`
Expand All @@ -645,13 +702,18 @@ async function formatEntityResult(
e.name,
et.slug AS entity_type,
e.metadata::jsonb->>'market' as market,
COALESCE(
(SELECT COUNT(*) FROM current_event_records WHERE e.id = ANY(entity_ids)),
0
) as content_count
CASE WHEN ${primaryIsCallerOrg} THEN
COALESCE(
(SELECT COUNT(*) FROM current_event_records ev
WHERE e.id = ANY(ev.entity_ids)
AND ev.organization_id = e.organization_id),
0
)
ELSE 0 END as content_count
FROM entities e
JOIN entity_types et ON et.id = e.entity_type_id
WHERE e.parent_id = ${primaryEntity.id}
AND e.organization_id = ${primaryRow.organization_id}
ORDER BY e.created_at DESC
`;
children = childRows.map((row) => ({
Expand Down
Loading