diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/analytics/recall_ranking.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/analytics/recall_ranking.ts index b3cd00998ef1e..e75950af7f158 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/analytics/recall_ranking.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/analytics/recall_ranking.ts @@ -8,7 +8,7 @@ import { RootSchema, EventTypeOpts } from '@kbn/core/server'; interface ScoredDocument { - elserScore: number; + esScore: number; llmScore: number; } @@ -21,10 +21,10 @@ const schema: RootSchema = { type: 'array', items: { properties: { - elserScore: { + esScore: { type: 'float', _meta: { - description: 'The score produced by ELSER text_expansion', + description: 'The score produced by Elasticsearch', }, }, llmScore: { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/functions/context.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/functions/context.ts index 80770b297f08e..4ccdc495435a9 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/functions/context.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/functions/context.ts @@ -65,7 +65,7 @@ export function registerContextFunction({ const userPrompt = userMessage?.message.content!; const userMessageFunctionName = userMessage?.message.name; - const { scores, relevantDocuments, suggestions } = await recallAndScore({ + const { llmScores, relevantDocuments, suggestions } = await recallAndScore({ recall: client.recall, chat, logger: resources.logger, @@ -80,7 +80,7 @@ export function registerContextFunction({ return { content: { ...content, learnings: relevantDocuments as unknown as Serializable }, data: { - scores, + llmScores, suggestions, }, }; diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts index 09b30c9e0f4c7..bb5a5095e30a5 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts @@ -217,12 +217,12 @@ const chatRecallRoute = createObservabilityAIAssistantServerRoute({ signal, }) ).pipe( - map(({ scores, suggestions, relevantDocuments }) => { + map(({ llmScores, suggestions, relevantDocuments }) => { return createFunctionResponseMessage({ name: 'context', data: { suggestions, - scores, + llmScores, }, content: { relevantDocuments, diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/index.ts index 2fbed5d394a40..c886b5187ab95 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/index.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/index.ts @@ -48,7 +48,7 @@ export interface RecalledEntry { id: string; title?: string; text: string; - score: number | null; + esScore: number | null; is_correction?: boolean; labels?: Record; } @@ -129,7 +129,7 @@ export class KnowledgeBaseService { is_correction: hit._source?.is_correction, labels: hit._source?.labels, title: hit._source?.title ?? hit._source?.doc_id, // use `doc_id` as fallback title for backwards compatibility - score: hit._score!, + esScore: hit._score!, id: hit._id!, })); } @@ -193,7 +193,7 @@ export class KnowledgeBaseService { const sortedEntries = orderBy( documentsFromKb.concat(documentsFromConnectors), - 'score', + 'esScore', 'desc' ).slice(0, limit.size ?? 20); diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts index fd6d8a0d698ad..afe112eb52322 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts @@ -51,7 +51,11 @@ export async function recallFromSearchConnectors({ }), ]); - return orderBy([...semanticTextConnectors, ...legacyConnectors], (entry) => entry.score, 'desc'); + return orderBy( + [...semanticTextConnectors, ...legacyConnectors], + (entry) => entry.esScore, + 'desc' + ); } async function recallFromSemanticTextConnectors({ @@ -108,7 +112,7 @@ async function recallFromSemanticTextConnectors({ const results = response.hits.hits.map((hit) => ({ text: JSON.stringify(hit._source), - score: hit._score!, + esScore: hit._score!, is_correction: false, id: hit._id!, })); @@ -194,7 +198,7 @@ async function recallFromLegacyConnectors({ const results = response.hits.hits.map((hit) => ({ text: JSON.stringify(hit._source), - score: hit._score!, + esScore: hit._score!, is_correction: false, id: hit._id!, })); diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.test.ts index abeeda3c37657..c5fb1bdfa584d 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.test.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.test.ts @@ -20,15 +20,15 @@ describe('parseSuggestionScores', () => { ).toEqual([ { id: 'my-id', - score: 1, + llmScore: 1, }, { id: 'my-other-id', - score: 7, + llmScore: 7, }, { id: 'my-another-id', - score: 10, + llmScore: 10, }, ]); }); @@ -37,15 +37,15 @@ describe('parseSuggestionScores', () => { expect(parseSuggestionScores(`idone,1;idtwo,7;idthree,10`)).toEqual([ { id: 'idone', - score: 1, + llmScore: 1, }, { id: 'idtwo', - score: 7, + llmScore: 7, }, { id: 'idthree', - score: 10, + llmScore: 10, }, ]); }); @@ -54,15 +54,15 @@ describe('parseSuggestionScores', () => { expect(parseSuggestionScores(`a,1 b,7 c,10`)).toEqual([ { id: 'a', - score: 1, + llmScore: 1, }, { id: 'b', - score: 7, + llmScore: 7, }, { id: 'c', - score: 10, + llmScore: 10, }, ]); }); diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.ts index 464504bed85a8..51b79a181e5ed 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/parse_suggestion_scores.ts @@ -7,7 +7,7 @@ export function parseSuggestionScores(scoresAsString: string) { // make sure that spaces, semi-colons etc work as separators as well - const scores = scoresAsString + const llmScores = scoresAsString .replace(/[^0-9a-zA-Z\-_,]/g, ' ') .trim() .split(/\s+/) @@ -16,9 +16,9 @@ export function parseSuggestionScores(scoresAsString: string) { return { id, - score: parseInt(score, 10), + llmScore: parseInt(score, 10), }; }); - return scores; + return llmScores; } diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.test.ts index c980279b46be2..7414d6a9c1695 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.test.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.test.ts @@ -91,7 +91,7 @@ describe('recallAndScore', () => { describe('when no documents are recalled', () => { let result: { relevantDocuments?: RecalledSuggestion[]; - scores?: Array<{ id: string; score: number }>; + llmScores?: Array<{ id: string; llmScore: number }>; suggestions: RecalledSuggestion[]; }; @@ -111,7 +111,7 @@ describe('recallAndScore', () => { }); it('returns empty suggestions', async () => { - expect(result).toEqual({ relevantDocuments: [], scores: [], suggestions: [] }); + expect(result).toEqual({ relevantDocuments: [], llmScores: [], suggestions: [] }); }); it('invokes recall with user prompt and screen context', async () => { @@ -129,7 +129,7 @@ describe('recallAndScore', () => { }); it('handles errors when scoring fails', async () => { - mockRecall.mockResolvedValue([{ id: 'doc1', text: 'Hello world', score: 0.5 }]); + mockRecall.mockResolvedValue([{ id: 'doc1', text: 'Hello world', esScore: 0.5 }]); (scoreSuggestions as jest.Mock).mockRejectedValue(new Error('Scoring failed')); const result = await recallAndScore({ @@ -152,10 +152,10 @@ describe('recallAndScore', () => { }); it('calls scoreSuggestions with correct arguments', async () => { - const recalledDocs = [{ id: 'doc1', text: 'Hello world', score: 0.8 }]; + const recalledDocs = [{ id: 'doc1', text: 'Hello world', esScore: 0.8 }]; mockRecall.mockResolvedValue(recalledDocs); (scoreSuggestions as jest.Mock).mockResolvedValue({ - scores: [{ id: 'doc1', score: 7 }], + llmScores: [{ id: 'doc1', llmScore: 7 }], relevantDocuments: recalledDocs, }); @@ -184,10 +184,10 @@ describe('recallAndScore', () => { it('handles the normal conversation flow correctly', async () => { mockRecall.mockResolvedValue([ - { id: 'fav_color', text: 'My favourite color is blue.', score: 0.9 }, + { id: 'fav_color', text: 'My favourite color is blue.', esScore: 0.9 }, ]); (scoreSuggestions as jest.Mock).mockResolvedValue({ - scores: [{ id: 'fav_color', score: 7 }], + llmScores: [{ id: 'fav_color', llmScore: 7 }], relevantDocuments: [{ id: 'fav_color', text: 'My favourite color is blue.' }], }); @@ -211,10 +211,10 @@ describe('recallAndScore', () => { it('handles contextual insights conversation flow correctly', async () => { mockRecall.mockResolvedValue([ - { id: 'alert_cause', text: 'The alert was triggered due to high CPU usage.', score: 0.85 }, + { id: 'alert_cause', text: 'The alert was triggered due to high CPU usage.', esScore: 0.85 }, ]); (scoreSuggestions as jest.Mock).mockResolvedValue({ - scores: [{ id: 'alert_cause', score: 6 }], + llmScores: [{ id: 'alert_cause', llmScore: 6 }], relevantDocuments: [ { id: 'alert_cause', text: 'The alert was triggered due to high CPU usage.' }, ], @@ -239,10 +239,10 @@ describe('recallAndScore', () => { }); it('reports analytics with the correct structure', async () => { - const recalledDocs = [{ id: 'doc1', text: 'Hello world', score: 0.8 }]; + const recalledDocs = [{ id: 'doc1', text: 'Hello world', esScore: 0.8 }]; mockRecall.mockResolvedValue(recalledDocs); (scoreSuggestions as jest.Mock).mockResolvedValue({ - scores: [{ id: 'doc1', score: 7 }], + llmScores: [{ id: 'doc1', llmScore: 7 }], relevantDocuments: recalledDocs, }); @@ -259,7 +259,7 @@ describe('recallAndScore', () => { expect(mockAnalytics.reportEvent).toHaveBeenCalledWith( recallRankingEventType, - expect.objectContaining({ scoredDocuments: [{ elserScore: 0.8, llmScore: 7 }] }) + expect.objectContaining({ scoredDocuments: [{ esScore: 0.8, llmScore: 7 }] }) ); }); }); diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.ts index 056e858b4e439..dd42b2e7045ce 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/recall_and_score.ts @@ -14,7 +14,7 @@ import type { FunctionCallChatFunction } from '../../service/types'; import { RecallRanking, recallRankingEventType } from '../../analytics/recall_ranking'; import { RecalledEntry } from '../../service/knowledge_base_service'; -export type RecalledSuggestion = Pick; +export type RecalledSuggestion = Pick; export async function recallAndScore({ recall, @@ -38,7 +38,7 @@ export async function recallAndScore({ signal: AbortSignal; }): Promise<{ relevantDocuments?: RecalledSuggestion[]; - scores?: Array<{ id: string; score: number }>; + llmScores?: Array<{ id: string; llmScore: number }>; suggestions: RecalledSuggestion[]; }> { const queries = [ @@ -47,19 +47,19 @@ export async function recallAndScore({ ].filter((query) => query.text.trim()); const suggestions: RecalledSuggestion[] = (await recall({ queries })).map( - ({ id, text, score }) => ({ id, text, score }) + ({ id, text, esScore }) => ({ id, text, esScore }) ); if (!suggestions.length) { return { relevantDocuments: [], - scores: [], + llmScores: [], suggestions: [], }; } try { - const { scores, relevantDocuments } = await scoreSuggestions({ + const { llmScores, relevantDocuments } = await scoreSuggestions({ suggestions, logger, messages, @@ -72,15 +72,15 @@ export async function recallAndScore({ analytics.reportEvent(recallRankingEventType, { scoredDocuments: suggestions.map((suggestion) => { - const llmScore = scores.find((score) => score.id === suggestion.id); + const llmScore = llmScores.find((score) => score.id === suggestion.id); return { - elserScore: suggestion.score ?? -1, - llmScore: llmScore ? llmScore.score : -1, + esScore: suggestion.esScore ?? -1, + llmScore: llmScore ? llmScore.llmScore : -1, }; }), }); - return { scores, relevantDocuments, suggestions }; + return { llmScores, relevantDocuments, suggestions }; } catch (error) { logger.error(`Error scoring documents: ${error.message}`, { error }); return { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.test.ts index 6be5e71e4f5cf..01c510e4fad0d 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.test.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.test.ts @@ -15,9 +15,9 @@ import { ChatEvent } from '../../../common/conversation_complete'; import { contextualInsightsMessages, normalConversationMessages } from './recall_and_score.test'; const suggestions: RecalledSuggestion[] = [ - { id: 'doc1', text: 'Relevant document 1', score: 0.9 }, - { id: 'doc2', text: 'Relevant document 2', score: 0.8 }, - { id: 'doc3', text: 'Less relevant document 3', score: 0.3 }, + { id: 'doc1', text: 'Relevant document 1', esScore: 0.9 }, + { id: 'doc2', text: 'Relevant document 2', esScore: 0.8 }, + { id: 'doc3', text: 'Less relevant document 3', esScore: 0.3 }, ]; const userPrompt = 'What is my favourite color?'; @@ -52,15 +52,15 @@ describe('scoreSuggestions', () => { logger: mockLogger, }); - expect(result.scores).toEqual([ - { id: 'doc1', score: 7 }, - { id: 'doc2', score: 5 }, - { id: 'doc3', score: 3 }, + expect(result.llmScores).toEqual([ + { id: 'doc1', llmScore: 7 }, + { id: 'doc2', llmScore: 5 }, + { id: 'doc3', llmScore: 3 }, ]); expect(result.relevantDocuments).toEqual([ - { id: 'doc1', text: 'Relevant document 1', score: 0.9 }, - { id: 'doc2', text: 'Relevant document 2', score: 0.8 }, + { id: 'doc1', text: 'Relevant document 1', esScore: 0.9 }, + { id: 'doc2', text: 'Relevant document 2', esScore: 0.8 }, ]); }); @@ -117,7 +117,7 @@ describe('scoreSuggestions', () => { }); expect(result.relevantDocuments).toEqual([ - { id: 'doc1', text: 'Relevant document 1', score: 0.9 }, + { id: 'doc1', text: 'Relevant document 1', esScore: 0.9 }, ]); }); @@ -159,10 +159,10 @@ describe('scoreSuggestions', () => { logger: mockLogger, }); - expect(result.scores).toEqual([ - { id: 'doc1', score: 7 }, - { id: 'doc2', score: 5 }, - { id: 'doc3', score: 3 }, + expect(result.llmScores).toEqual([ + { id: 'doc1', llmScore: 7 }, + { id: 'doc2', llmScore: 5 }, + { id: 'doc3', llmScore: 3 }, ]); }); }); diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.ts index cfacc37469268..f7975d0325048 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/utils/recall/score_suggestions.ts @@ -49,7 +49,7 @@ export async function scoreSuggestions({ logger: Logger; }): Promise<{ relevantDocuments: RecalledSuggestion[]; - scores: Array<{ id: string; score: number }>; + llmScores: Array<{ id: string; llmScore: number }>; }> { const shortIdTable = new ShortIdTable(); @@ -72,7 +72,7 @@ export async function scoreSuggestions({ Documents: ${JSON.stringify( suggestions.map((suggestion) => ({ - ...omit(suggestion, 'score'), // Omit score to not bias the LLM + ...omit(suggestion, 'esScore'), // Omit ES score to not bias the LLM id: shortIdTable.take(suggestion.id), // Shorten id to save tokens })), null, @@ -126,21 +126,21 @@ export async function scoreSuggestions({ scoreFunctionRequest.message.function_call.arguments ); - const scores = parseSuggestionScores(scoresAsString) + const llmScores = parseSuggestionScores(scoresAsString) // Restore original IDs (added fallback to id for testing purposes) - .map(({ id, score }) => ({ id: shortIdTable.lookup(id) || id, score })); + .map(({ id, llmScore }) => ({ id: shortIdTable.lookup(id) || id, llmScore })); - if (scores.length === 0) { + if (llmScores.length === 0) { // seemingly invalid or no scores, return all - return { relevantDocuments: suggestions, scores: [] }; + return { relevantDocuments: suggestions, llmScores: [] }; } const suggestionIds = suggestions.map((document) => document.id); // get top 5 documents ids with scores > 4 - const relevantDocumentIds = scores - .filter(({ score }) => score > 4) - .sort((a, b) => b.score - a.score) + const relevantDocumentIds = llmScores + .filter(({ llmScore }) => llmScore > 4) + .sort((a, b) => b.llmScore - a.llmScore) .slice(0, 5) .filter(({ id }) => suggestionIds.includes(id ?? '')) // Remove hallucinated documents .map(({ id }) => id); @@ -153,6 +153,6 @@ export async function scoreSuggestions({ return { relevantDocuments, - scores: scores.map((score) => ({ id: score.id, score: score.score })), + llmScores: llmScores.map((score) => ({ id: score.id, llmScore: score.llmScore })), }; } diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/context.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/context.spec.ts index 1803ef86cf98b..6f7ec963f267d 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/context.spec.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/context.spec.ts @@ -239,7 +239,7 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon parsedContextResponseData.suggestions.forEach((suggestion: RecalledSuggestion) => { expect(suggestion).to.have.property('id'); expect(suggestion).to.have.property('text'); - expect(suggestion).to.have.property('score'); + expect(suggestion).to.have.property('esScore'); }); const suggestionTexts = parsedContextResponseData.suggestions diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/recall.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/recall.spec.ts index 10815d250e84a..f064dde938ff6 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/recall.spec.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/functions/recall.spec.ts @@ -90,7 +90,7 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon describe('GET /internal/observability_ai_assistant/functions/recall', () => { it('produces unique scores for each doc', async () => { const entries = await recall('What happened during the database outage?'); - const uniqueScores = uniq(entries.map(({ score }) => score)); + const uniqueScores = uniq(entries.map(({ esScore }) => esScore)); expect(uniqueScores.length).to.be.greaterThan(1); expect(uniqueScores.length).to.be(8); }); @@ -104,7 +104,7 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon it('returns entries in a consistent order', async () => { const entries = await recall('whales'); - expect(entries.map(({ id, score }) => `${formatScore(score!)} - ${id}`)).to.eql([ + expect(entries.map(({ id, esScore }) => `${formatScore(esScore!)} - ${id}`)).to.eql([ 'high - animal_whale_migration_patterns', 'low - animal_elephants_social_structure', 'low - technical_api_gateway_timeouts', @@ -118,12 +118,12 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon it('returns the "Cheetah" entry from search connectors as the top result', async () => { const entries = await recall('Cheetah'); - const { text, score } = first(entries)!; + const { text, esScore } = first(entries)!; // search connector entries have their entire doc stringified in `text` field const parsedDoc = JSON.parse(text) as { title: string; text: string }; expect(parsedDoc.title).to.eql('The Life of a Cheetah'); - expect(score).to.greaterThan(0.1); + expect(esScore).to.greaterThan(0.1); }); it('returns different result order for different queries', async () => {