diff --git a/packages/kbn-esql-ast/src/__tests__/ast_parser.columns.test.ts b/packages/kbn-esql-ast/src/__tests__/ast_parser.columns.test.ts new file mode 100644 index 0000000000000..0a6dbe1f772a2 --- /dev/null +++ b/packages/kbn-esql-ast/src/__tests__/ast_parser.columns.test.ts @@ -0,0 +1,92 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { getAstAndSyntaxErrors as parse } from '../ast_parser'; + +describe('Column Identifier Expressions', () => { + it('can parse un-quoted identifiers', () => { + const text = 'ROW a, b.c'; + const { ast } = parse(text); + + expect(ast).toMatchObject([ + { + type: 'command', + args: [ + { + type: 'column', + parts: ['a'], + }, + { + type: 'column', + parts: ['b', 'c'], + }, + ], + }, + ]); + }); + + it('can parse quoted identifiers', () => { + const text = 'ROW `a`, `b`.c, `d`.`👍`.`123``123`'; + const { ast } = parse(text); + + expect(ast).toMatchObject([ + { + type: 'command', + args: [ + { + type: 'column', + parts: ['a'], + }, + { + type: 'column', + parts: ['b', 'c'], + }, + { + type: 'column', + parts: ['d', '👍', '123`123'], + }, + ], + }, + ]); + }); + + it('can mix quoted and un-quoted identifiers', () => { + const text = 'ROW part1.part2.`part``3️⃣`'; + const { ast } = parse(text); + + expect(ast).toMatchObject([ + { + type: 'command', + args: [ + { + type: 'column', + parts: ['part1', 'part2', 'part`3️⃣'], + }, + ], + }, + ]); + }); + + it('in KEEP command', () => { + const text = 'FROM a | KEEP a.b'; + const { ast } = parse(text); + + expect(ast).toMatchObject([ + {}, + { + type: 'command', + args: [ + { + type: 'column', + parts: ['a', 'b'], + }, + ], + }, + ]); + }); +}); diff --git a/packages/kbn-esql-ast/src/ast_helpers.ts b/packages/kbn-esql-ast/src/ast_helpers.ts index 6ebb3fb9c4490..44f9a2663db17 100644 --- a/packages/kbn-esql-ast/src/ast_helpers.ts +++ b/packages/kbn-esql-ast/src/ast_helpers.ts @@ -11,12 +11,14 @@ */ import { type Token, type ParserRuleContext, type TerminalNode } from 'antlr4'; -import type { - ArithmeticUnaryContext, - DecimalValueContext, - InlineCastContext, - IntegerValueContext, - QualifiedIntegerLiteralContext, +import { + QualifiedNameContext, + type ArithmeticUnaryContext, + type DecimalValueContext, + type InlineCastContext, + type IntegerValueContext, + type QualifiedIntegerLiteralContext, + QualifiedNamePatternContext, } from './antlr/esql_parser'; import { getPosition } from './ast_position_utils'; import { DOUBLE_TICKS_REGEX, SINGLE_BACKTICK, TICKS_REGEX } from './constants'; @@ -39,6 +41,7 @@ import type { FunctionSubtype, ESQLNumericLiteral, } from './types'; +import { parseIdentifier } from './parser/helpers'; export function nonNullable(v: T): v is NonNullable { return v != null; @@ -360,10 +363,13 @@ export function createSource( } export function createColumnStar(ctx: TerminalNode): ESQLColumn { + const text = ctx.getText(); + return { type: 'column', - name: ctx.getText(), - text: ctx.getText(), + name: text, + parts: [text], + text, location: getPosition(ctx.symbol), incomplete: ctx.getText() === '', quoted: false, @@ -371,11 +377,22 @@ export function createColumnStar(ctx: TerminalNode): ESQLColumn { } export function createColumn(ctx: ParserRuleContext): ESQLColumn { + const parts: string[] = []; + if (ctx instanceof QualifiedNamePatternContext) { + parts.push( + ...ctx.identifierPattern_list().map((identifier) => parseIdentifier(identifier.getText())) + ); + } else if (ctx instanceof QualifiedNameContext) { + parts.push(...ctx.identifier_list().map((identifier) => parseIdentifier(identifier.getText()))); + } else { + parts.push(sanitizeIdentifierString(ctx)); + } const text = sanitizeIdentifierString(ctx); const hasQuotes = Boolean(getQuotedText(ctx) || isQuoted(ctx.getText())); return { type: 'column' as const, name: text, + parts, text: ctx.getText(), location: getPosition(ctx.start, ctx.stop), incomplete: Boolean(ctx.exception || text === ''), diff --git a/packages/kbn-esql-ast/src/parser/helpers.ts b/packages/kbn-esql-ast/src/parser/helpers.ts new file mode 100644 index 0000000000000..9aea72a3a2073 --- /dev/null +++ b/packages/kbn-esql-ast/src/parser/helpers.ts @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +export const isQuotedIdentifier = (text: string): boolean => { + const firstChar = text[0]; + const lastChar = text[text.length - 1]; + + return firstChar === '`' && lastChar === '`'; +}; + +export const parseIdentifier = (text: string): string => { + const isQuoted = isQuotedIdentifier(text); + + if (!isQuoted) { + return text; + } + + return text.slice(1, -1).replace(/``/g, '`'); +}; + +export const regexUnquotedIdentifierPattern = /^([a-z\*_\@]{1})[a-z0-9_\*]*$/i; + +export const formatIdentifier = (text: string): string => { + if (regexUnquotedIdentifierPattern.test(text)) { + return text; + } + + return `\`${text.replace(/`/g, '``')}\``; +}; + +export const formatIdentifierParts = (parts: string[]): string => + parts.map(formatIdentifier).join('.'); diff --git a/packages/kbn-esql-ast/src/types.ts b/packages/kbn-esql-ast/src/types.ts index b256556a58062..ae675a375a430 100644 --- a/packages/kbn-esql-ast/src/types.ts +++ b/packages/kbn-esql-ast/src/types.ts @@ -179,6 +179,22 @@ export interface ESQLSource extends ESQLAstBaseItem { export interface ESQLColumn extends ESQLAstBaseItem { type: 'column'; + + /** + * An identifier can be composed of multiple parts, e.g: part1.part2.`part``3️⃣`. + * This property contains the parsed unquoted parts of the identifier. + * For example: `['part1', 'part2', 'part`3️⃣']`. + */ + parts: string[]; + + /** + * @deprecated + * + * An identifier can be composed of multiple parts, e.g: part1.part2.`part3️⃣` + * + * Each part can be quoted or not quoted independently. A single `quoted` + * property is not enough to represent the identifier. Use `parts` instead. + */ quoted: boolean; }