Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,22 @@ async function evaluateEsqlQuery({

const docBase = await EsqlDocumentBase.load();

const prompts = docBase.getPrompts();
const languageDescription = `${prompts.syntax}

${prompts.examples}
`;

const usedCommands = await retrieveUsedCommands({
question,
answer,
esqlDescription: docBase.getSystemMessage(),
esqlDescription: languageDescription,
});

const requestedDocumentation = docBase.getDocumentation(usedCommands, {
generateMissingKeywordDoc: false,
});
requestedDocumentation.commands_and_functions = docBase.getSystemMessage();
requestedDocumentation.commands_and_functions = languageDescription;

const evaluation = await evaluationClient.evaluate({
input: `
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import { INLINE_ESQL_QUERY_REGEX } from '../../../../common/tasks/nl_to_esql/con
import type { EsqlDocumentBase } from '../doc_base';
import { requestDocumentationSchema } from './shared';
import type { NlToEsqlTaskEvent } from '../types';
import { generateEsqlPrompt } from './prompts';

const MAX_CALLS = 5;

Expand All @@ -44,14 +45,13 @@ type LlmEsqlTask<TToolOptions extends ToolOptions = ToolOptions> = (
interface GenerateEsqlTaskOptions
extends Pick<ChatCompleteOptions, 'maxRetries' | 'retryConfiguration' | 'functionCalling'> {
connectorId: string;
systemMessage: string;
messages: Message[];
chatCompleteApi: ChatCompleteAPI;
docBase: EsqlDocumentBase;
logger: Pick<Logger, 'debug'>;
metadata?: ChatCompleteMetadata;
system?: string;
maxCallsAllowed?: number;
additionalSystemInstructions?: string;
}

export function generateEsqlTask<TToolOptions extends ToolOptions>(
Expand All @@ -63,15 +63,14 @@ export function generateEsqlTask<TToolOptions extends ToolOptions>(
export function generateEsqlTask({
chatCompleteApi,
connectorId,
systemMessage,
additionalSystemInstructions,
messages,
toolOptions: { tools, toolChoice },
docBase,
functionCalling,
maxRetries,
retryConfiguration,
logger,
system,
metadata,
maxCallsAllowed = MAX_CALLS,
}: GenerateEsqlTaskOptions & {
Expand Down Expand Up @@ -108,32 +107,11 @@ export function generateEsqlTask({
retryConfiguration,
metadata,
stream: true,
system: `${systemMessage}

# Current task

Your current task is to respond to the user's question. If there is a tool
suitable for answering the user's question, use that tool, preferably
with a natural language reply included.

Format any ES|QL query as follows:
\`\`\`esql
<query>
\`\`\`

When generating ES|QL, it is VERY important that you only use commands and functions present in the
requested documentation, and follow the syntax as described in the documentation and its examples.
Assume that ONLY the set of capabilities described in the provided ES|QL documentation is valid, and
do not try to guess parameters or syntax based on other query languages.

If what the user is asking for is not technically achievable with ES|QL's capabilities, just inform
the user. DO NOT invent capabilities not described in the documentation just to provide
a positive answer to the user. E.g. Pagination is not supported by the language, do not try to invent
workarounds based on other languages.

When converting queries from one language to ES|QL, make sure that the functions are available
and documented in ES|QL. E.g., for SPL's LEN, use LENGTH. For IF, use CASE.
${system ? `## Additional instructions\n\n${system}` : ''}`,
system: generateEsqlPrompt({
esqlPrompts: docBase.getPrompts(),
additionalSystemInstructions,
hasTools: !functionLimitReached && Object.keys(tools ?? {}).length > 0,
}),
messages: [
...messages,
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { EsqlPrompts } from '../doc_base/load_data';

export const requestDocumentationSystemPrompt = ({ esqlPrompts }: { esqlPrompts: EsqlPrompts }) => {
return `You are an assistant that helps with writing ESQL query for Elasticsearch.

Your current task is to examine the previous conversation, and to request documentation
from the ES|QL handbook to help you get the right information needed to generate a query.

Below are the ES|QL syntax and some examples from the official ES|QL documentation.

${esqlPrompts.syntax}

${esqlPrompts.examples}`;
};

export const generateEsqlPrompt = ({
esqlPrompts,
additionalSystemInstructions,
hasTools = false,
}: {
esqlPrompts: EsqlPrompts;
additionalSystemInstructions?: string;
hasTools?: boolean;
}) => {
const hasToolBlock = hasTools
? `**IMPORTANT**: If there is a tool suitable for answering the user's question, use that tool,
preferably with a natural language reply included.`
: undefined;

return `You are an assistant that helps with writing ESQL query for Elasticsearch.
Given a natural language query, you will generate an ESQL query that can be executed against the data source.

# Current task

Your current task is to respond to the user's question.

${hasToolBlock}

## Documentation

${esqlPrompts.syntax}

${esqlPrompts.examples}

${esqlPrompts.instructions}

${
additionalSystemInstructions
? `<additional_instructions>\n${additionalSystemInstructions}\n</<additional_instructions>`
: ''
}

Take your time and think step by step about the natural language query and how to convert it into ESQL.

Format any ES|QL query as follows:
\`\`\`esql
<query>
\`\`\`

${hasToolBlock}`;
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,37 @@
* 2.0.
*/

import { isEmpty } from 'lodash';
import type {
ToolOptions,
Message,
ChatCompleteMetadata,
ChatCompleteOptions,
OutputAPI,
} from '@kbn/inference-common';
import { ToolChoiceType, withoutOutputUpdateEvents } from '@kbn/inference-common';
import { withoutOutputUpdateEvents } from '@kbn/inference-common';
import type { EsqlPrompts } from '../doc_base/load_data';
import { requestDocumentationSystemPrompt } from './prompts';

import { requestDocumentationSchema } from './shared';

export const requestDocumentation = ({
outputApi,
system,
esqlPrompts,
messages,
connectorId,
functionCalling,
maxRetries,
retryConfiguration,
metadata,
toolOptions: { tools, toolChoice },
toolOptions,
}: {
outputApi: OutputAPI;
system: string;
messages: Message[];
esqlPrompts: EsqlPrompts;
connectorId: string;
metadata?: ChatCompleteMetadata;
toolOptions: ToolOptions;
} & Pick<ChatCompleteOptions, 'maxRetries' | 'retryConfiguration' | 'functionCalling'>) => {
const hasTools = !isEmpty(tools) && toolChoice !== ToolChoiceType.none;

return outputApi({
id: 'request_documentation',
connectorId,
Expand All @@ -45,32 +44,17 @@ export const requestDocumentation = ({
maxRetries,
retryConfiguration,
metadata,
system,
system: requestDocumentationSystemPrompt({ esqlPrompts }),
previousMessages: messages,
input: `Based on the previous conversation, request documentation
input: `Now, based on the previous conversation, request documentation
from the ES|QL handbook to help you get the right information
needed to generate a query.

Examples for functions and commands:
- Do you need to group data? Request \`STATS\`.
- Extract data? Request \`DISSECT\` AND \`GROK\`.
- Convert a column based on a set of conditionals? Request \`EVAL\` and \`CASE\`.

${
hasTools
? `### Tools

The following tools will be available to be called in the step after this.

\`\`\`json
${JSON.stringify({
tools,
toolChoice,
})}
\`\`\``
: ''
}
`,
`,
schema: requestDocumentationSchema,
}).pipe(withoutOutputUpdateEvents());
};
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
const aliases: Record<string, string[]> = {
STATS: ['STATS_BY', 'BY', 'STATS...BY', 'STATS ... BY'],
OPERATORS: ['LIKE', 'RLIKE', 'IN'],
LOOKUP_JOIN: ['LOOKUPJOIN'],
LOOKUP_JOIN: ['JOIN', 'LOOKUPJOIN', 'LOOKUP JOIN'],
FROM: ['METADATA'],
};

const getAliasMap = () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,15 @@
*/

import { once } from 'lodash';
import { loadData, type EsqlDocData, type EsqlDocEntry } from './load_data';
import { loadData, type EsqlDocData, type EsqlDocEntry, type EsqlPrompts } from './load_data';
import { tryResolveAlias } from './aliases';
import { getSuggestions } from './suggestions';
import type { GetDocsOptions } from './types';

const loadDataOnce = once(loadData);

const overviewEntries = ['SYNTAX', 'OVERVIEW', 'OPERATORS'];

export class EsqlDocumentBase {
private systemMessage: string;
private prompts: EsqlPrompts;
private docRecords: Record<string, EsqlDocEntry>;

static async load(): Promise<EsqlDocumentBase> {
Expand All @@ -25,20 +23,27 @@ export class EsqlDocumentBase {
}

constructor(rawData: EsqlDocData) {
this.systemMessage = rawData.systemMessage;
this.prompts = rawData.prompts;
this.docRecords = rawData.docs;
}

getSystemMessage() {
return this.systemMessage;
getPrompts(): EsqlPrompts {
return this.prompts;
}

/** @deprecated use individual prompts instead */
getSystemMessage(): string {
return `${this.prompts.syntax}

${this.prompts.examples}
`;
}

getDocumentation(
rawKeywords: string[],
{
generateMissingKeywordDoc = true,
addSuggestions = true,
addOverview = true,
resolveAliases = true,
}: GetDocsOptions = {}
) {
Expand All @@ -54,10 +59,6 @@ export class EsqlDocumentBase {
keywords.push(...getSuggestions(keywords));
}

if (addOverview) {
keywords.push(...overviewEntries);
}

return [...new Set(keywords)].reduce<Record<string, string>>((results, keyword) => {
if (Object.hasOwn(this.docRecords, keyword)) {
results[keyword] = this.docRecords[keyword].data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,37 @@ export interface EsqlDocEntry {
data: string;
}

export interface EsqlPrompts {
syntax: string;
instructions: string;
examples: string;
}

export interface EsqlDocData {
systemMessage: string;
prompts: EsqlPrompts;
docs: Record<string, EsqlDocEntry>;
}

export const loadData = async (): Promise<EsqlDocData> => {
const [systemMessage, docs] = await Promise.all([loadSystemMessage(), loadEsqlDocs()]);
const [prompts, docs] = await Promise.all([loadPrompts(), loadEsqlDocs()]);
return {
systemMessage,
prompts,
docs,
};
};

const loadSystemMessage = async () => {
return (await readFile(Path.join(__dirname, '../system_message.txt'))).toString('utf-8');
const loadPrompt = async (fileName: string) => {
return (await readFile(Path.join(__dirname, `../prompts/${fileName}`))).toString('utf-8');
};

const loadPrompts = async () => {
return Promise.all([
loadPrompt('examples.txt'),
loadPrompt('instructions.txt'),
loadPrompt('syntax.txt'),
]).then(([examples, instructions, syntax]) => {
return { examples, instructions, syntax };
});
};

const loadEsqlDocs = async (): Promise<Record<string, EsqlDocEntry>> => {
Expand Down
Loading