Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ import { ASSISTANT_ELSER_INFERENCE_ID } from './field_maps_configuration';
import { BulkOperationError } from '../../lib/data_stream/documents_data_writer';
import { AUDIT_OUTCOME, KnowledgeBaseAuditAction, knowledgeBaseAuditEvent } from './audit_events';
import { findDocuments } from '../find';
import { ensureIntegrationKnowledgeIndexEntry } from '../../ai_assistant_service/integration_knowledge_helper';

/**
* Params for when creating KbDataClient in Request Context Factory. Useful if needing to modify
Expand All @@ -93,6 +94,7 @@ export interface KnowledgeBaseDataClientParams extends AIAssistantDataClientPara
manageGlobalKnowledgeBaseAIAssistant: boolean;
getTrainedModelsProvider: () => ReturnType<TrainedModelsProvider['trainedModelsProvider']>;
elserInferenceId?: string;
telemetry: AnalyticsServiceSetup;
Comment thread
jen-huang marked this conversation as resolved.
}
export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
constructor(public readonly options: KnowledgeBaseDataClientParams) {
Expand Down Expand Up @@ -293,6 +295,19 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
}
}

// Ensure integration knowledge index entry exists first (synchronous)
try {
await ensureIntegrationKnowledgeIndexEntry(
this,
this.options.logger.get('integrationKnowledge'),
this.options.telemetry
);
} catch (error) {
this.options.logger.error(
`Failed to ensure integration knowledge index entry: ${error.message}`
);
}

if (!ignoreSecurityLabs) {
this.options.logger.debug(`Checking if Knowledge Base docs have been loaded...`);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type {
ElasticsearchClient,
KibanaRequest,
SavedObjectsClientContract,
AnalyticsServiceSetup,
} from '@kbn/core/server';
import type { TaskManagerSetupContract } from '@kbn/task-manager-plugin/server';
import type { MlPluginSetup } from '@kbn/ml-plugin/server';
Expand Down Expand Up @@ -95,6 +96,7 @@ export interface CreateAIAssistantClientParams {
spaceId: string;
currentUser: AuthenticatedUser | null;
licensing: Promise<LicensingApiRequestHandlerContext>;
telemetry: AnalyticsServiceSetup;
}

export type CreateDataStream = (params: {
Expand Down Expand Up @@ -609,6 +611,7 @@ export class AIAssistantService {
spaceId: opts.spaceId,
manageGlobalKnowledgeBaseAIAssistant: opts.manageGlobalKnowledgeBaseAIAssistant ?? false,
getTrainedModelsProvider: opts.getTrainedModelsProvider,
telemetry: opts.telemetry,
});
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { AnalyticsServiceSetup, Logger } from '@kbn/core/server';
import { IndexEntryType } from '@kbn/elastic-assistant-common';
import { AIAssistantKnowledgeBaseDataClient } from '../ai_assistant_data_clients/knowledge_base';

const INTEGRATION_KNOWLEDGE_INDEX_NAME = '.integration_knowledge';

/**
* Checks if the integration knowledge index entry already exists
*/
export const checkIntegrationKnowledgeIndexEntryExists = async ({
kbDataClient,
logger,
}: {
kbDataClient: AIAssistantKnowledgeBaseDataClient;
logger: Logger;
}): Promise<boolean> => {
try {
const results = await kbDataClient.findDocuments({
Comment thread
jen-huang marked this conversation as resolved.
Outdated
page: 1,
perPage: 1,
filter: `type:index AND index:${INTEGRATION_KNOWLEDGE_INDEX_NAME}`,
});

const exists = results.total > 0;
logger.debug(`Integration knowledge index entry exists: ${exists}`);
return exists;
} catch (error) {
logger.debug(`Error checking integration knowledge index entry: ${error.message}`);
return false;
}
};

/**
* Ensures the integration knowledge index entry exists during Knowledge Base setup.
* Similar to loadSecurityLabs() but for Index Entries rather than Document Entries.
*/
export const ensureIntegrationKnowledgeIndexEntry = async (
kbDataClient: AIAssistantKnowledgeBaseDataClient,
logger: Logger,
telemetry: AnalyticsServiceSetup
): Promise<boolean> => {
try {
logger.debug('Checking if integration knowledge index entry exists...');

const entryExists = await checkIntegrationKnowledgeIndexEntryExists({
kbDataClient,
logger,
});

if (!entryExists) {
logger.debug('Creating integration knowledge index entry...');

const entry = await kbDataClient.createKnowledgeBaseEntry({

@jen-huang jen-huang Aug 14, 2025

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIU, this will be executed as the current user at the time of the request context factory creation. my concerns:

  1. I hoped that I could use a system/internal user instead but that doesn't appear to be a concept with these clients. it feels right to use a system user because we expect this entry to always exist so as long as the AI Assistant is available.

    should a way to use a system user be introduced or is scoping to current user not a problem?

  2. or perhaps this setup lives in the wrong place? if this were Fleet I would expect this code to execute during plugin start/setup, but kbDataClient is not initialized there

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually executed with esClient.asInternalUser (usage, plugin init), however this path still verifies that the user is authenticated and has global privileges, which might not be the case, and so this would error out. Looks like you can bypass this though and go directly to the underlying createKnowledgeBaseEntry() implementation and pass a dummy user object.

I think this should work, as the IndexEntry is marked as global and shouldn't be constrained by any user filters, but I'm hesitant as we don't do this anywhere else -- the security labs content appears to be installed as the current user as well still, so something that needs fixed here...

WRT 2., we actually do all our index setup during plugin start/setup via the AIAssistantService, so we could technically do this over there once all the assets have been installed. The reason we don't do this for the Security Labs content is that they're DocumentEntries and so have semantic_text field content, so we need to ensure ELSER is deployed and ready beforehand (which happens as part of KB setup).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually just about to pick up a major refactor of the kbDataClient to add multilingual support for 9.2 (by adding support for arbitrary inferenceId's), so I can try and address some of these ergonomics as part of that. Please let me know if there's anything else you're tracking here that might be helpful.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like you can bypass this though and go directly to the underlying createKnowledgeBaseEntry() implementation and pass a dummy user object.

I think this should work, as the IndexEntry is marked as global and shouldn't be constrained by any user filters, but I'm hesitant as we don't do this anywhere else

I tried this approach but the pattern of using a dummy user felt quite odd, so I reverted it and left the implementation as is.

WRT 2., we actually do all our index setup during plugin start/setup via the AIAssistantService, so we could technically do this over there once all the assets have been installed. The reason we don't do this for the Security Labs content is that they're DocumentEntries and so have semantic_text field content, so we need to ensure ELSER is deployed and ready beforehand (which happens as part of KB setup).

what do you mean once all the assets have been installed? is it still relevant after the ES work in elastic/elasticsearch#132506 and for elastic/elasticsearch#133171?

I moved the call to ensureIntegrationKnowledgeIndexEntry to be executed first in setupKnowledgeBase so that it's always executed regardless of the ML nodes, ELSER readiness, etc. do you see any issues with that?

on the Fleet side, we don't do any additional checks before we install a package's KB contents (push documents to .integration_knowledge index)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you mean once all the assets have been installed? is it still relevant after the ES work in elastic/elasticsearch#132506 and for elastic/elasticsearch#133171?

By assets I just meant all the index/component templates and such. Was just commenting that we could create the initial IndexEntry record after that is complete. This is independent of those elasticsearch PR's.

I moved the call to ensureIntegrationKnowledgeIndexEntry to be executed first in setupKnowledgeBase so that it's always executed regardless of the ML nodes, ELSER readiness, etc. do you see any issues with that?

This is fine. As far as I understand (need to test/confirm though), document creates with no value for the semantic_text will not result in an inference call and should succeed. Which is not the case for other forms of updates (docs).


We actually just got confirmation this week from Product that we can tie assistant features to inference API availability, so that means the KB setup process is going to go away. We can now always assume we'll have access to an inference endpoint (and so can ingest documents containing semantic_text at any time). That said, based on @sorenlouv's post over here (elastic/elasticsearch#133171 (comment)), we may want to re-work our approach here since this must be queries with the internal esClient which isn't currently being passed through getStructuredToolForIndexEntry() (so we'd need a special case for this specific IndexEntry).

Let's chat when you have a moment and we can see what Søren has to say on that issue as well.

knowledgeBaseEntry: {
type: IndexEntryType.value,
index: INTEGRATION_KNOWLEDGE_INDEX_NAME,
field: 'content',
name: 'Integration Knowledge',
description:
'Integration knowledge base containing semantic information about integrations installed via Fleet',
queryDescription:
'Use this tool to search for information about integrations, integration configurations, troubleshooting guides, and best practices',
Comment thread
jen-huang marked this conversation as resolved.
Outdated
global: true,
users: [],
},
telemetry,
});

if (entry) {
logger.info('Integration knowledge index entry created successfully');
return true;
} else {
logger.warn('Failed to create integration knowledge index entry');
return false;
}
} else {
logger.debug('Integration knowledge index entry already exists');
return true;
}
} catch (error) {
logger.error(`Error ensuring integration knowledge index entry: ${error.message}`);
return false;
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ export class RequestContextFactory implements IRequestContextFactory {
spaceId: getSpaceId(),
logger: this.logger,
licensing: context.licensing,
telemetry: core.analytics,
currentUser,
elserInferenceId: params?.elserInferenceId,
manageGlobalKnowledgeBaseAIAssistant:
Expand All @@ -159,6 +160,7 @@ export class RequestContextFactory implements IRequestContextFactory {
logger: this.logger,
currentUser,
adhocAttackDiscoveryDataClient: this.adhocAttackDiscoveryDataClient,
telemetry: core.analytics,
});
}),

Expand All @@ -177,6 +179,7 @@ export class RequestContextFactory implements IRequestContextFactory {
licensing: context.licensing,
logger: this.logger,
currentUser,
telemetry: core.analytics,
});
}),

Expand All @@ -187,6 +190,7 @@ export class RequestContextFactory implements IRequestContextFactory {
licensing: context.licensing,
logger: this.logger,
currentUser,
telemetry: core.analytics,
});
}),

Expand All @@ -197,6 +201,7 @@ export class RequestContextFactory implements IRequestContextFactory {
licensing: context.licensing,
logger: this.logger,
currentUser,
telemetry: core.analytics,
});
}),

Expand All @@ -207,6 +212,7 @@ export class RequestContextFactory implements IRequestContextFactory {
licensing: context.licensing,
logger: this.logger,
currentUser,
telemetry: core.analytics,
});
}),

Expand All @@ -218,6 +224,7 @@ export class RequestContextFactory implements IRequestContextFactory {
logger: this.logger,
currentUser,
contentReferencesEnabled: params?.contentReferencesEnabled,
telemetry: core.analytics,
});
}),
};
Expand Down