diff --git a/muted-tests.yml b/muted-tests.yml index db3b6e627426f..a7379121da61e 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -378,18 +378,12 @@ tests: - class: org.elasticsearch.reindex.management.ReindexManagementClientYamlTestSuiteIT method: test {yaml=reindex/30_cancel_reindex/Cancel running reindex returns response and GET confirms completed} issue: https://github.com/elastic/elasticsearch/issues/142079 -- class: org.elasticsearch.xpack.esql.qa.multi_node.GenerativeIT - method: test - issue: https://github.com/elastic/elasticsearch/issues/143023 - class: org.elasticsearch.xpack.sql.qa.security.CliApiKeyIT method: testCliConnectionWithApiKey issue: https://github.com/elastic/elasticsearch/issues/143125 - class: org.elasticsearch.packaging.test.DebMetadataTests method: test05CheckLintian issue: https://github.com/elastic/elasticsearch/issues/142819 -- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeIT - method: test - issue: https://github.com/elastic/elasticsearch/issues/143023 - class: org.elasticsearch.xpack.security.apikey.ApiKeyWorkflowsRestrictionRestIT method: testWorkflowsRestrictionAllowsAccess issue: https://github.com/elastic/elasticsearch/issues/143130 diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index c70caf337501d..e2eeb8108667a 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.generator.QueryExecuted; import org.elasticsearch.xpack.esql.generator.QueryExecutor; import org.elasticsearch.xpack.esql.generator.command.CommandGenerator; +import org.elasticsearch.xpack.esql.generator.command.source.FromGenerator; import org.elasticsearch.xpack.esql.qa.rest.ProfileLogger; import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase; import org.junit.AfterClass; @@ -44,6 +45,7 @@ import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.COLUMN_TYPE; import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.UNMAPPED_FIELD_NAMES; import static org.elasticsearch.xpack.esql.generator.command.source.FromGenerator.SET_UNMAPPED_FIELDS_PREFIX; +import static org.elasticsearch.xpack.esql.generator.command.source.FromGenerator.isFromSource; public abstract class GenerativeRestTest extends ESRestTestCase implements QueryExecutor { @@ -83,8 +85,20 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query "Expected to replace a single StubRelation in the plan, but none found", // https://github.com/elastic/elasticsearch/issues/142219 "blocks is empty", // https://github.com/elastic/elasticsearch/issues/142473 "Overflow to represent absolute value of .*.MIN_VALUE", // https://github.com/elastic/elasticsearch/issues/142642 - "illegal query_string option \\[boost\\]", // https://github.com/elastic/elasticsearch/issues/142758 "found value \\[.*\\] type \\[unsupported\\]", // https://github.com/elastic/elasticsearch/issues/142761 + "illegal query_string option \\[boost\\]", // https://github.com/elastic/elasticsearch/issues/142758 + "change point value \\[.*\\] must be numeric", // https://github.com/elastic/elasticsearch/issues/142858 + // https://github.com/elastic/elasticsearch/issues/142860 + "(Grok|Dissect) only supports KEYWORD or TEXT values, found expression \\[.*\\] type \\[NULL\\]", + // https://github.com/elastic/elasticsearch/issues/142543 + "Column \\[.*\\] has conflicting data types in FORK branches: \\[NULL\\] and \\[.*\\]", + "Column \\[.*\\] has conflicting data types in FORK branches: \\[.*\\] and \\[NULL\\]", + "illegal match option \\[zero_terms_query\\]", // https://github.com/elastic/elasticsearch/issues/143070 + "Field \\[.*\\] of type \\[.*\\] does not support match.* queries", + "Input for URI_PARTS must be of type [string] but is [null]", // https://github.com/elastic/elasticsearch/issues/143145 + "JOIN left field \\[.*\\] of type \\[NULL\\] is incompatible with right", // https://github.com/elastic/elasticsearch/issues/141827 + // https://github.com/elastic/elasticsearch/issues/141827 + "JOIN left field \\[.*\\] of type \\[.*\\] is incompatible with right field \\[.*\\] of type \\[NULL\\]", // Awaiting fixes for correctness "Expecting at most \\[.*\\] columns, got \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/129561 @@ -102,12 +116,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query "expected named expression for grouping; got ", "Time-series aggregations require direct use of @timestamp which was not found. If @timestamp was renamed in EVAL, " + "use the original @timestamp field instead.", // https://github.com/elastic/elasticsearch/issues/140607 - "change point value \\[.*\\] must be numeric", // https://github.com/elastic/elasticsearch/issues/142858 - // https://github.com/elastic/elasticsearch/issues/142860 - "(Grok|Dissect) only supports KEYWORD or TEXT values, found expression \\[.*\\] type \\[NULL\\]", - // https://github.com/elastic/elasticsearch/issues/142543 - "Column \\[.*\\] has conflicting data types in FORK branches: \\[NULL\\] and \\[.*\\]", - "Column \\[.*\\] has conflicting data types in FORK branches: \\[.*\\] and \\[NULL\\]", // Ts-command errors awaiting fixes "Output has changed from \\[.*\\] to \\[.*\\]" // https://github.com/elastic/elasticsearch/issues/134794 @@ -118,6 +126,10 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query .map(x -> Pattern.compile(x, Pattern.DOTALL)) .collect(Collectors.toSet()); + private static final Pattern FULL_TEXT_AFTER_SORT_PATTERN = Pattern.compile( + ".*\\[(KQL|QSTR)] function cannot be used after SORT.*", + Pattern.DOTALL + ); /** * Matches "Unknown column [X]" errors, optionally followed by ", did you mean [Y]?". * This error is expected when an unmapped field is used after a schema-fixing command (KEEP, DROP, STATS) @@ -140,11 +152,12 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query Pattern.DOTALL ); /** - * Matches "... argument of [X] must be [Y], found value [unmapped_field] type [Z]" errors. + * Matches "... argument of [X] must be [Y], found value [Z] type [T]" errors. * This happens when an unmapped field ends up with a different data type that doesn't match the one of the function's argument(s). + * The unmapped field name may appear either in the function expression(group 1) or in the found-value position (group 2). */ private static final Pattern ANY_TYPE_MISMATCH_PATTERN = Pattern.compile( - ".*argument of \\[.*] must be \\[.*], found value \\[([^]]+)] type \\[.*].*", + ".*argument of \\[([^]]+)] must be \\[.*], found value \\[([^]]+)] type \\[.*].*", Pattern.DOTALL ); /** @@ -215,7 +228,10 @@ public void run(CommandGenerator generator, CommandGenerator.CommandDescription final boolean hasException = result.exception() != null; if (hasException || checkResults(previousCommands, generator, current, previousResult, result).success() == false) { if (hasException) { - checkException(result); + List commands = new ArrayList<>(previousCommands.size() + 1); + commands.addAll(previousCommands); + commands.add(current); + checkException(result, commands); } continueExecuting = false; currentSchema = List.of(); @@ -223,6 +239,13 @@ public void run(CommandGenerator generator, CommandGenerator.CommandDescription continueExecuting = true; currentSchema = result.outputSchema(); } + if (previousCommands.isEmpty() && continueExecuting && isFromSource(current)) { + current.context() + .put( + FromGenerator.INDEX_FIELD_NAMES, + currentSchema.stream().map(Column::name).collect(java.util.stream.Collectors.toSet()) + ); + } previousCommands.add(current); previousResult = result; } @@ -279,6 +302,41 @@ protected CommandGenerator sourceCommand() { return EsqlQueryGenerator.sourceCommand(); } + private record FailureContext(String errorMessage, String query, List previousCommands) { + FailureContext { + previousCommands = previousCommands == null ? List.of() : previousCommands; + } + } + + private static final AllowedFailureRule[] ALLOWED_FAILURE_RULES = { ctx -> { + for (Pattern allowedError : ALLOWED_ERROR_PATTERNS) { + if (isAllowedError(ctx.errorMessage, allowedError)) { + return true; + } + } + return false; + }, + ctx -> isUnmappedFieldError(ctx.errorMessage, ctx.query), + ctx -> isScalarTypeMismatchError(ctx.errorMessage), + ctx -> isFirstLastSameFieldError(ctx.errorMessage, ctx.query), + ctx -> isForkOptimizationBugWithUnmappedFields(ctx.errorMessage, ctx.query), + ctx -> isFieldFullTextError(ctx.errorMessage, ctx.query, ctx.previousCommands), + ctx -> isFullTextAfterSampleBug(ctx.errorMessage, ctx.query), + ctx -> isFullTextAfterWhereBugs(ctx.errorMessage), + ctx -> isLenientFalseFailedToCreateFullTextQueryError(ctx.errorMessage, ctx.query), }; + + private static boolean isAllowedFailure(FailureContext ctx) { + if (ctx == null || ctx.errorMessage == null) { + return false; + } + for (AllowedFailureRule rule : ALLOWED_FAILURE_RULES) { + if (rule.matches(ctx)) { + return true; + } + } + return false; + } + protected static CommandGenerator.ValidationResult checkResults( List previousCommands, CommandGenerator commandGenerator, @@ -295,19 +353,7 @@ protected static CommandGenerator.ValidationResult checkResults( result.result() ); if (outputValidation.success() == false) { - for (Pattern allowedError : ALLOWED_ERROR_PATTERNS) { - if (isAllowedError(outputValidation.errorMessage(), allowedError)) { - return outputValidation; - } - } - if (isUnmappedFieldError(outputValidation.errorMessage(), result.query()) - || isScalarTypeMismatchError(outputValidation.errorMessage())) { - return outputValidation; - } - if (isFirstLastSameFieldError(outputValidation.errorMessage(), result.query())) { - return outputValidation; - } - if (isForkOptimizationBugWithUnmappedFields(outputValidation.errorMessage(), result.query())) { + if (isAllowedFailure(new FailureContext(outputValidation.errorMessage(), result.query(), previousCommands))) { return outputValidation; } fail("query: " + result.query() + "\nerror: " + outputValidation.errorMessage()); @@ -315,20 +361,8 @@ protected static CommandGenerator.ValidationResult checkResults( return outputValidation; } - protected void checkException(QueryExecuted query) { - for (Pattern allowedError : ALLOWED_ERROR_PATTERNS) { - if (isAllowedError(query.exception().getMessage(), allowedError)) { - return; - } - } - if (isUnmappedFieldError(query.exception().getMessage(), query.query()) - || isScalarTypeMismatchError(query.exception().getMessage())) { - return; - } - if (isFirstLastSameFieldError(query.exception().getMessage(), query.query())) { - return; - } - if (isForkOptimizationBugWithUnmappedFields(query.exception().getMessage(), query.query())) { + protected void checkException(QueryExecuted query, List previousCommands) { + if (isAllowedFailure(new FailureContext(query.exception().getMessage(), query.query(), previousCommands))) { return; } fail("query: " + query.query() + "\nexception: " + query.exception().getMessage()); @@ -338,57 +372,70 @@ protected void checkException(QueryExecuted query) { * Long lines in exceptions can be split across several lines. When a newline is inserted, the end of the current line and the beginning * of the new line are marked with a backslash {@code \}; the new line will also have whitespace before the backslash for aligning. */ - private static final Pattern ERROR_MESSAGE_LINE_BREAK = Pattern.compile("\\\\\n\\s*\\\\"); + private static final Pattern ERROR_MESSAGE_LINE_BREAK = Pattern.compile("\\\\\r?\n\\s*\\\\"); + + private static String normalizeErrorMessage(String errorMessage) { + return ERROR_MESSAGE_LINE_BREAK.matcher(errorMessage).replaceAll(""); + } private static boolean isAllowedError(String errorMessage, Pattern allowedPattern) { - String errorWithoutLineBreaks = ERROR_MESSAGE_LINE_BREAK.matcher(errorMessage).replaceAll(""); + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); return allowedPattern.matcher(errorWithoutLineBreaks).matches(); } /** * Checks if the error is a known unmapped field error. This covers: *
    + *
  • "[KQL|QSTR] function cannot be used after SORT" (https://github.com/elastic/elasticsearch/issues/142959)
  • + *
  • "Rule execution limit [100] reached" - can happen with complex plans involving "nullify" unmapped fields + * (https://github.com/elastic/elasticsearch/issues/142390)
  • *
  • "Unknown column [X], did you mean [Y]?" - both X and Y must be unmapped field names
  • *
  • "Unknown column [X]" (no suggestion) - X must be an unmapped field name
  • *
  • "first argument of [X] is [null] so second argument must also be [null] but was [Y]" - * the expression X must contain an unmapped field name (https://github.com/elastic/elasticsearch/issues/142115)
  • - *
  • "Rule execution limit [100] reached" - can happen with complex plans involving "nullify" unmapped fields
  • + * *
*/ private static boolean isUnmappedFieldError(String errorMessage, String query) { if (query.startsWith(SET_UNMAPPED_FIELDS_PREFIX) == false) { return false; } - String errorWithoutLineBreaks = ERROR_MESSAGE_LINE_BREAK.matcher(errorMessage).replaceAll(""); - // Try the more specific pattern first (with suggestion) - Matcher matcher = UNKNOWN_COLUMN_WITH_SUGGESTION_PATTERN.matcher(errorWithoutLineBreaks); + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); + if (errorWithoutLineBreaks.contains("Rule execution limit [100] reached")) { + return true; + } + + Matcher matcher = FULL_TEXT_AFTER_SORT_PATTERN.matcher(errorWithoutLineBreaks); + if (matcher.matches()) { + return true; + } + + matcher = UNKNOWN_COLUMN_WITH_SUGGESTION_PATTERN.matcher(errorWithoutLineBreaks); if (matcher.matches()) { String unknownColumn = matcher.group(1); String suggestedColumn = matcher.group(2); return UNMAPPED_NAMES.contains(unknownColumn) && UNMAPPED_NAMES.contains(suggestedColumn); } - // Try the simpler pattern (no suggestion) + matcher = UNKNOWN_COLUMN_PATTERN.matcher(errorWithoutLineBreaks); if (matcher.matches()) { String unknownColumn = matcher.group(1); return UNMAPPED_NAMES.contains(unknownColumn); } - // NULL type mismatch in binary operations involving unmapped fields + matcher = NULL_TYPE_MISMATCH_PATTERN.matcher(errorWithoutLineBreaks); if (matcher.matches()) { String expression = matcher.group(1); return UNMAPPED_NAMES.stream().anyMatch(expression::contains); } - // a non-NULL type mismatch for an unmapped field name used in a function as argument + matcher = ANY_TYPE_MISMATCH_PATTERN.matcher(errorWithoutLineBreaks); if (matcher.matches()) { - String expression = matcher.group(1); - return UNMAPPED_NAMES.stream().anyMatch(expression::contains); - } - // https://github.com/elastic/elasticsearch/issues/142390 - if (errorWithoutLineBreaks.contains("Rule execution limit [100] reached")) { - return true; + String functionExpression = matcher.group(1); + String foundValue = matcher.group(2); + return UNMAPPED_NAMES.stream().anyMatch(name -> functionExpression.contains(name) || foundValue.contains(name)); } + return false; } @@ -398,17 +445,17 @@ private static boolean isUnmappedFieldError(String errorMessage, String query) { * These errors are acceptable since the generative tests may compose function calls with fields of these types. */ private static boolean isScalarTypeMismatchError(String errorMessage) { - String errorWithoutLineBreaks = ERROR_MESSAGE_LINE_BREAK.matcher(errorMessage).replaceAll(""); + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); return SCALAR_TYPE_MISMATCH_PATTERN.matcher(errorWithoutLineBreaks).matches(); } /** - * Checks if the error is an {@code ArrayIndexOutOfBoundsException} caused by calling FIRST or LAST - * with problematic arguments. - * See #142180 + * Checks if the error is an {@code ArrayIndexOutOfBoundsException} caused by calling FIRST or LAST with problematic arguments. + * See https://github.com/elastic/elasticsearch/issues/142180 */ private static boolean isFirstLastSameFieldError(String errorMessage, String query) { - if (errorMessage.contains("out of bounds for length") == false) { + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); + if (errorWithoutLineBreaks.contains("out of bounds for length") == false) { return false; } if (FIRST_LAST_NULL_ARG_PATTERN.matcher(query).find()) { @@ -429,14 +476,147 @@ private static boolean isFirstLastSameFieldError(String errorMessage, String que ); /** - * When {@code SET unmapped_fields="nullify"} is used, the _fork reference can go missing - * during plan optimization. - * See #142762 + * When {@code SET unmapped_fields="nullify"} is used, the _fork reference can go missing during plan optimization. + * https://github.com/elastic/elasticsearch/issues/142762 */ static boolean isForkOptimizationBugWithUnmappedFields(String errorMessage, String query) { - return query.startsWith(SET_UNMAPPED_FIELDS_PREFIX) && FORK_OPTIMIZED_INCORRECTLY_PATTERN.matcher(errorMessage).matches(); + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); + return query.startsWith(SET_UNMAPPED_FIELDS_PREFIX) && FORK_OPTIMIZED_INCORRECTLY_PATTERN.matcher(errorWithoutLineBreaks).matches(); } + private static final Pattern NOT_A_FIELD_FROM_INDEX_PATTERN = Pattern.compile( + ".*cannot operate on \\[([^]]+)\\], which is not a field from an index mapping.*", + Pattern.DOTALL + ); + + /** + * Captures fields created by GROK patterns, e.g. {@code %{WORD:foo}} or {@code %{NUMBER:bar:int}}. + */ + private static final Pattern GROK_GENERATED_FIELD_PATTERN = Pattern.compile("%\\{[^:}]+:([^}:]+)(?::[^}]+)?}"); + /** + * Captures fields created by DISSECT patterns, e.g. {@code %{foo}}. Ignores skip fields like {@code %{?}} or {@code %{?skip}}. + */ + private static final Pattern DISSECT_GENERATED_FIELD_PATTERN = Pattern.compile("%\\{([^}]+)}"); + + private static final Pattern MV_EXPAND_FIELD_PATTERN = Pattern.compile("(?i)\\|\\s*mv_expand\\s+`?([^`|\\s]+)`?"); + + private static final Pattern RENAME_NEW_FIELD_PATTERN = Pattern.compile("(?i)\\bas\\s+(`[^`]+`|[^,|\\s]+)"); + + /** + * Checks if the error is a full-text function/operator rejecting a field that is not a FieldAttribute from an index mapping. It covers: + *
    + *
  • Fields added by an ENRICH command (enrich fields)
  • + *
  • Fields expanded by MV_EXPAND (the expanded fields)
  • + *
  • Fields created by GROK or DISSECT (the "extracted" fields)
  • + *
  • Fields renamed via RENAME
  • + *
+ * The error is allowed only when the offending field can be traced back to one of these commands. + */ + static boolean isFieldFullTextError(String errorMessage, String query, List previousCommands) { + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); + Matcher m = NOT_A_FIELD_FROM_INDEX_PATTERN.matcher(errorWithoutLineBreaks); + if (m.matches() == false) { + return false; + } + String lowerQuery = query.toLowerCase(java.util.Locale.ROOT); + if (lowerQuery.contains("| enrich ") || lowerQuery.startsWith("enrich ")) { + return true; + } + // see https://github.com/elastic/elasticsearch/issues/142713 + String fieldName = EsqlQueryGenerator.unquote(m.group(1)); + Matcher mvMatcher = MV_EXPAND_FIELD_PATTERN.matcher(query); + while (mvMatcher.find()) { + if (EsqlQueryGenerator.unquote(mvMatcher.group(1)).equals(fieldName)) { + return true; + } + } + for (var previous : previousCommands) { + String name = previous.commandName(); + if (name == null) { + continue; + } + name = name.toLowerCase(java.util.Locale.ROOT); + if ("grok".equals(name)) { + Matcher gm = GROK_GENERATED_FIELD_PATTERN.matcher(previous.commandString()); + while (gm.find()) { + if (EsqlQueryGenerator.unquote(gm.group(1)).equals(fieldName)) { + return true; + } + } + } else if ("dissect".equals(name)) { + Matcher dm = DISSECT_GENERATED_FIELD_PATTERN.matcher(previous.commandString()); + while (dm.find()) { + String generated = dm.group(1); + if (generated.startsWith("?")) { + continue; + } + if (EsqlQueryGenerator.unquote(generated).equals(fieldName)) { + return true; + } + } + } else if ("rename".equals(name)) { + Matcher rm = RENAME_NEW_FIELD_PATTERN.matcher(previous.commandString()); + while (rm.find()) { + if (EsqlQueryGenerator.unquote(rm.group(1).trim()).equals(fieldName)) { + return true; + } + } + } + } + return false; + } + + /** + * SAMPLE should not block QSTR/KQL when it appears after the WHERE containing them, but currently it does. + * See https://github.com/elastic/elasticsearch/issues/142694 + */ + static boolean isFullTextAfterSampleBug(String errorMessage, String query) { + return FULL_TEXT_AFTER_SAMPLE_PATTERN.matcher(normalizeErrorMessage(errorMessage)).matches() + && query.toLowerCase(java.util.Locale.ROOT).contains("| sample"); + } + + private static final Pattern FULL_TEXT_AFTER_SAMPLE_PATTERN = Pattern.compile( + ".*\\[(KQL|QSTR)] function cannot be used after SAMPLE.*", + Pattern.DOTALL + ); + + /** + * See https://github.com/elastic/elasticsearch/issues/142705 + * See https://github.com/elastic/elasticsearch/issues/142710 + */ + static boolean isFullTextAfterWhereBugs(String errorMessage) { + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); + return FULL_TEXT_AFTER_WHERE_PATTERN.matcher(errorWithoutLineBreaks).matches(); + } + + private static final Pattern FULL_TEXT_AFTER_WHERE_PATTERN = Pattern.compile( + ".*(?:(?:\\[(?:KQL|QSTR|MATCH|MultiMatch)] function)|(?:\\[:\\] operator)) cannot be used after \\(?WHERE.*", + Pattern.DOTALL + ); + + /** + * Work around a query-building failure in full-text functions when options include {@code {"lenient": false}}. + */ + static boolean isLenientFalseFailedToCreateFullTextQueryError(String errorMessage, String query) { + String errorWithoutLineBreaks = normalizeErrorMessage(errorMessage); + if (errorWithoutLineBreaks.contains("failed to create query: For input string") == false) { + return false; + } + return MULTI_MATCH_LENIENT_FALSE_PATTERN.matcher(query).find() + || MATCH_LENIENT_FALSE_PATTERN.matcher(query).find() + || QSTR_LENIENT_FALSE_PATTERN.matcher(query).find(); + } + + private static final Pattern MULTI_MATCH_LENIENT_FALSE_PATTERN = Pattern.compile( + "(?i)\\bmulti_match\\s*\\([^)]*\\{[^}]*[\"']lenient[\"']\\s*:\\s*false[^}]*}[^)]*\\)" + ); + private static final Pattern MATCH_LENIENT_FALSE_PATTERN = Pattern.compile( + "(?i)\\bmatch\\s*\\([^)]*\\{[^}]*[\"']lenient[\"']\\s*:\\s*false[^}]*}[^)]*\\)" + ); + private static final Pattern QSTR_LENIENT_FALSE_PATTERN = Pattern.compile( + "(?i)\\bqstr\\s*\\([^)]*\\{[^}]*[\"']lenient[\"']\\s*:\\s*false[^}]*}[^)]*\\)" + ); + @Override @SuppressWarnings("unchecked") public QueryExecuted execute(String query, int depth) { @@ -502,4 +682,8 @@ private List lookupIndices() { result.add(new LookupIdx("multi_column_joinable_lookup", multiColumnJoinableLookupKeys)); return result; } + + private interface AllowedFailureRule { + boolean matches(FailureContext ctx); + } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/EsqlQueryGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/EsqlQueryGenerator.java index cde8bd07259b0..a5714f49a4286 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/EsqlQueryGenerator.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/EsqlQueryGenerator.java @@ -36,8 +36,8 @@ import org.elasticsearch.xpack.esql.parser.ParserUtils; import java.util.Collection; +import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -48,6 +48,7 @@ import static org.elasticsearch.test.ESTestCase.randomFrom; import static org.elasticsearch.test.ESTestCase.randomIntBetween; import static org.elasticsearch.test.ESTestCase.randomLongBetween; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.COMMONLY_SUPPORTED_TYPES; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.areUnmappedFieldsAllowed; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.binaryMathFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.caseFunction; @@ -58,6 +59,7 @@ import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.conversionFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.dateDiffFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.dateFunction; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.fullTextFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.greatestLeastFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.inExpression; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.ipPrefixFunction; @@ -71,6 +73,7 @@ import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.stringFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.stringToBoolFunction; import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.stringToIntFunction; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.typeSafeExpression; import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; public class EsqlQueryGenerator { @@ -170,7 +173,7 @@ public static void generatePipeline( // do a dummy query to get available fields first // TODO: modify when METRICS_INFO available https://github.com/elastic/elasticsearch/issues/141413 String index = promQLGenerator.generateIndices(schema); - var fromDesc = new CommandGenerator.CommandDescription("from", FromGenerator.INSTANCE, "FROM " + index, Map.of()); + var fromDesc = new CommandGenerator.CommandDescription("from", FromGenerator.INSTANCE, "FROM " + index, new HashMap<>()); executor.run(FromGenerator.INSTANCE, fromDesc); executor.clearCommandHistory(); desc = promQLGenerator.generateWithIndices(List.of(), executor.currentSchema(), schema, queryExecutor, index); @@ -217,15 +220,6 @@ public static void generatePipeline( } } - /** - * Generates a boolean expression. - * @deprecated Use {@link #booleanExpression(List, List)} instead to properly handle unmapped fields - */ - @Deprecated - public static String booleanExpression(List previousOutput) { - return booleanExpression(previousOutput, null); - } - /** * Generates a boolean expression. * @param previousOutput the columns available in the current schema @@ -233,7 +227,7 @@ public static String booleanExpression(List previousOutput) { */ public static String booleanExpression(List previousOutput, List previousCommands) { boolean allowUnmapped = areUnmappedFieldsAllowed(previousCommands); - return switch (randomIntBetween(0, 11)) { + return switch (randomIntBetween(0, 13)) { case 0, 1, 2 -> { String field = randomNumericField(previousOutput); if (field == null) { @@ -249,6 +243,7 @@ public static String booleanExpression(List previousOutput, List likeExpression(previousOutput, allowUnmapped); case 9 -> rlikeExpression(previousOutput, allowUnmapped); case 10 -> cidrMatchFunction(previousOutput, allowUnmapped); + case 11, 12 -> fullTextFunction(previousOutput, previousCommands); default -> { // Numeric comparison on function result String funcExpr = stringToIntFunction(previousOutput, allowUnmapped); @@ -501,14 +496,14 @@ public static String agg(List previousOutput, List { // top() accepts: boolean, double, integer, long, date, ip, keyword, text Set topTypes = Set.of("boolean", "double", "integer", "long", "date", "datetime", "ip", "keyword", "text"); - String topField = FunctionGenerator.typeSafeExpression(previousOutput, topTypes, allowUnmapped); + String topField = typeSafeExpression(previousOutput, topTypes, allowUnmapped); if (topField == null) topField = anyName; String order = randomIntBetween(0, 1) == 0 ? "asc" : "desc"; yield "top(" + topField + ", " + randomIntBetween(1, 5) + ", \"" + order + "\")"; } case 8 -> { // sample() - use a commonly supported field to avoid type issues - String sampleField = randomName(previousOutput, FunctionGenerator.COMMONLY_SUPPORTED_TYPES); + String sampleField = randomName(previousOutput, COMMONLY_SUPPORTED_TYPES); if (sampleField == null) sampleField = anyName; yield "sample(" + sampleField + ", " + randomIntBetween(1, 10) + ")"; } @@ -657,7 +652,7 @@ public static String functionExpression(List previousOutput, List greatestLeastFunction(previousOutput, allowUnmapped); case 14 -> mvSliceZipFunction(previousOutput, allowUnmapped); case 15 -> splitFunction(previousOutput, allowUnmapped); - case 16 -> clampFunction(previousOutput, allowUnmapped); + case 16 -> clampFunction(previousOutput); case 17 -> dateDiffFunction(previousOutput, allowUnmapped); default -> ipPrefixFunction(previousOutput, allowUnmapped); }; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/FunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/FunctionGenerator.java index 142f7fbc2e4af..59dc96fb72fe6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/FunctionGenerator.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/FunctionGenerator.java @@ -9,53 +9,43 @@ import org.elasticsearch.xpack.esql.generator.command.CommandGenerator; import org.elasticsearch.xpack.esql.generator.command.source.FromGenerator; +import org.elasticsearch.xpack.esql.generator.function.BooleanExpressionGenerator; +import org.elasticsearch.xpack.esql.generator.function.ConditionalFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.DateFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.FullTextFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.IpFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.MathFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.MvFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.StringFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.TypeConversionFunctionGenerator; +import org.elasticsearch.xpack.esql.generator.function.TypeSafeExpressionGenerator; -import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.function.Supplier; -import java.util.stream.Collectors; -import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength; -import static org.elasticsearch.test.ESTestCase.randomBoolean; -import static org.elasticsearch.test.ESTestCase.randomFrom; import static org.elasticsearch.test.ESTestCase.randomIntBetween; -import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; /** * Generates random ESQL function expressions for testing. * This generator creates expressions for scalar functions across different categories: - * math, string, date, type conversion, conditional, and multivalue functions. + * math, string, date, type conversion, conditional, multivalue, ip, boolean expressions, and full-text. *

- * Some functions will randomly use unmapped field names (from {@link org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator#UNMAPPED_FIELD_NAMES}) + * Some functions will randomly use unmapped field names (from + * {@link org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator#UNMAPPED_FIELD_NAMES}) * to test how functions handle the NULL data type that comes from the "unmapped fields" functionality. *

* Note: Unmapped fields can only be used before schema-fixing commands (STATS, KEEP, DROP) are encountered. * After these commands, the field list is fixed and new unmapped fields cannot be introduced. + *

+ * This class is a facade over category-specific generators in {@code org.elasticsearch.xpack.esql.generator.function}. + * Callers should use this class so we can evolve the underlying generators without churning imports across the codebase. */ public class FunctionGenerator { - /** - * Probability (0-100) of using an unmapped field name instead of a real field. - * This tests how functions handle NULL data type from unmapped fields. - */ - private static final int UNMAPPED_FIELD_PROBABILITY = 10; - - /** - * Command names that fix the schema - after these commands, no new unmapped fields can be introduced. - */ - private static final Set SCHEMA_FIXING_COMMANDS = Set.of("stats", "keep", "drop"); - /** * Types that are commonly supported across most scalar and aggregate functions. - * Functions like coalesce() can produce expressions of any type, but when those expressions - * are used as arguments to other functions (like top(), greatest(), etc.), the type must - * be compatible. Restricting to these types avoids type errors when composing function calls. - *

- * Notably excludes: date_range, geo_point, geo_shape, cartesian_point, cartesian_shape, - * histogram, unsigned_long, aggregate_metric_double, and other rare types. */ - static final Set COMMONLY_SUPPORTED_TYPES = Set.of( + public static final Set COMMONLY_SUPPORTED_TYPES = Set.of( "integer", "long", "double", @@ -69,56 +59,23 @@ public class FunctionGenerator { ); /** - * Types that are NOT accepted by most scalar functions. These are special metric/internal types - * that should be excluded when selecting fields for general-purpose function arguments. - *

- * Most scalar functions (mv_slice, mv_count, to_string, etc.) reject these types with errors like: - * "must be [any type except counter types, dense_vector, aggregate_metric_double, ...]" + * Probability (0-100) of using an unmapped field name instead of a real field. + * This tests how functions handle NULL data type from unmapped fields. */ - private static final Set SCALAR_UNSUPPORTED_TYPES = Set.of( - "counter_long", - "counter_double", - "counter_integer", - "aggregate_metric_double", - "dense_vector", - "tdigest", - "histogram", - "exponential_histogram", - "date_range" - ); + private static final int UNMAPPED_FIELD_PROBABILITY = 10; /** - * Returns a field name suitable for use as a scalar function argument. - * Excludes types that are rejected by most scalar functions (counter types, aggregate_metric_double, etc.). - * - * @param columns the available columns - * @return a field name of a type accepted by most scalar functions, or null if none available + * Command names that fix the schema - after these commands, no new unmapped fields can be introduced. */ - static String randomScalarField(List columns) { - List suitable = columns.stream().filter(c -> SCALAR_UNSUPPORTED_TYPES.contains(c.type()) == false).toList(); - if (suitable.isEmpty()) { - return null; - } - return EsqlQueryGenerator.randomName(suitable); - } + private static final Set SCHEMA_FIXING_COMMANDS = Set.of("stats", "keep", "drop"); /** * Checks if unmapped fields are allowed based on the command history. - * Unmapped fields require two conditions to be met: - *

    - *
  1. The SET unmapped_fields="nullify" directive must be present in the FROM command
  2. - *
  3. No schema-fixing commands (STATS, KEEP, DROP) must have been encountered yet, - * since those commands fix the field list and new unmapped fields cannot be introduced after them
  4. - *
- * - * @param previousCommands the list of previous commands in the query - * @return true if unmapped fields can be used, false otherwise */ public static boolean areUnmappedFieldsAllowed(List previousCommands) { if (previousCommands == null) { return false; } - // Check if SET unmapped_fields="nullify" was included in the FROM command if (isUnmappedFieldsEnabled(previousCommands) == false) { return false; } @@ -138,738 +95,230 @@ public static boolean isUnmappedFieldsEnabled(List 0 && probabilityIncrease < 10 : "Probability increase should be in interval [1, 9]"; return randomIntBetween(0, 100) < UNMAPPED_FIELD_PROBABILITY * probabilityIncrease; } - /** - * Returns a field name, with some probability returning an unmapped field name instead. - * - * @param realField the real field to use if not using unmapped - * @param allowUnmapped if false, never returns an unmapped field - * @return either the unmapped field name or the real field - */ - private static String fieldOrUnmapped(String realField, boolean allowUnmapped) { - if (realField == null) { - return null; - } - String unmapped = maybeUnmappedField(allowUnmapped); - return unmapped != null ? unmapped : realField; - } - - // ========== MATH FUNCTIONS ========== + // ========= Math ========= /** * Generates a math function that takes a numeric argument and returns a numeric value. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String mathFunction(List columns, boolean allowUnmapped) { - String numericField = fieldOrUnmapped(EsqlQueryGenerator.randomNumericField(columns), allowUnmapped); - if (numericField == null) { - // Fall back to unmapped field if no numeric fields available - numericField = maybeUnmappedField(allowUnmapped); - if (numericField == null) { - return null; - } - } - return randomFrom( - // Unary math functions - "abs(" + numericField + ")", - "ceil(" + numericField + ")", - "floor(" + numericField + ")", - "signum(" + numericField + ")", - "sqrt(abs(" + numericField + "))", // abs to avoid negative sqrt - "cbrt(" + numericField + ")", - "exp(" + numericField + " % 10)", // mod to avoid overflow - "log10(abs(" + numericField + ") + 1)", // +1 to avoid log(0) - "round(" + numericField + ")", - "round(" + numericField + ", " + randomIntBetween(0, 5) + ")", - // Trigonometric functions - "sin(" + numericField + ")", - "cos(" + numericField + ")", - "tan(" + numericField + ")", - "asin(" + numericField + " % 1)", // mod 1 to keep in [-1,1] - "acos(" + numericField + " % 1)", - "atan(" + numericField + ")", - "sinh(" + numericField + " % 10)", - "cosh(" + numericField + " % 10)", - "tanh(" + numericField + ")", - // Constants - "pi()", - "e()", - "tau()" - ); + return MathFunctionGenerator.mathFunction(columns, allowUnmapped); } /** * Generates a binary math function. * May randomly use unmapped field names to test NULL data type handling. - * Note: greatest/least are handled separately in greatestLeastFunction to ensure type compatibility. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String binaryMathFunction(List columns, boolean allowUnmapped) { - String field1 = fieldOrUnmapped(EsqlQueryGenerator.randomNumericField(columns), allowUnmapped); - String field2 = fieldOrUnmapped(EsqlQueryGenerator.randomNumericField(columns), allowUnmapped); - if (field1 == null || field2 == null) { - return null; - } - return randomFrom( - "pow(" + field1 + ", 2)", - "pow(" + field1 + ", abs(" + field2 + ") % 5 + 1)", - "log(abs(" + field1 + ") + 1, abs(" + field2 + ") + 2)", - "atan2(" + field1 + ", " + field2 + ")", - "hypot(" + field1 + ", " + field2 + ")", - "copy_sign(" + field1 + ", " + field2 + ")", - "scalb(" + field1 + ", " + randomIntBetween(-5, 5) + ")" - ); + return MathFunctionGenerator.binaryMathFunction(columns, allowUnmapped); } /** * Generates a clamp function (clamp, clamp_min, clamp_max). - * Note: clamp/clamp_min/clamp_max do NOT accept NULL for the field parameter, - * so unmapped fields (which resolve to NULL type) must not be used here. - * - * @param columns the available columns - * @param allowUnmapped ignored for the field parameter since clamp rejects NULL fields + * Unmapped fields (NULL type) are not allowed for clamp's field argument. */ - public static String clampFunction(List columns, boolean allowUnmapped) { - // clamp/clamp_min/clamp_max reject NULL for the field parameter, so don't use unmapped fields - String numericField = EsqlQueryGenerator.randomNumericField(columns); - if (numericField == null) { - return null; - } - int min = randomIntBetween(-100, 50); - int max = min + randomIntBetween(1, 100); - return randomFrom( - "clamp(" + numericField + ", " + min + ", " + max + ")", - "clamp_min(" + numericField + ", " + min + ")", - "clamp_max(" + numericField + ", " + max + ")" - ); + public static String clampFunction(List columns) { + return MathFunctionGenerator.clampFunction(columns); } - // ========== STRING FUNCTIONS ========== + // ========= String ========= /** * Generates a string function that returns a string. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String stringFunction(List columns, boolean allowUnmapped) { - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField == null) { - return null; - } - return randomFrom( - "to_lower(" + stringField + ")", - "to_upper(" + stringField + ")", - "trim(" + stringField + ")", - "ltrim(" + stringField + ")", - "rtrim(" + stringField + ")", - "reverse(" + stringField + ")", - "left(" + stringField + ", " + randomIntBetween(1, 10) + ")", - "right(" + stringField + ", " + randomIntBetween(1, 10) + ")", - "substring(" + stringField + ", " + randomIntBetween(0, 5) + ", " + randomIntBetween(1, 10) + ")", - "repeat(" + stringField + ", " + randomIntBetween(1, 3) + ")", - "space(" + randomIntBetween(0, 10) + ")", - "replace(" + stringField + ", \"a\", \"b\")", - "md5(" + stringField + ")", - "sha1(" + stringField + ")", - "sha256(" + stringField + ")", - "to_base64(" + stringField + ")", - "from_base64(to_base64(" + stringField + "))", - "url_encode(" + stringField + ")", - "url_decode(" + stringField + ")" - ); + return StringFunctionGenerator.stringFunction(columns, allowUnmapped); } /** * Generates a string function that returns an integer (length-like functions). * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String stringToIntFunction(List columns, boolean allowUnmapped) { - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField == null) { - return null; - } - return randomFrom( - "length(" + stringField + ")", - "bit_length(" + stringField + ")", - "byte_length(" + stringField + ")", - "locate(" + stringField + ", \"a\")", - "locate(" + stringField + ", \"a\", " + randomIntBetween(0, 5) + ")" - ); + return StringFunctionGenerator.stringToIntFunction(columns, allowUnmapped); } /** * Generates a string function that returns a boolean. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String stringToBoolFunction(List columns, boolean allowUnmapped) { - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField == null) { - return null; - } - String searchStr = "\"" + randomAlphaOfLength(randomIntBetween(1, 3)) + "\""; - return randomFrom( - "starts_with(" + stringField + ", " + searchStr + ")", - "ends_with(" + stringField + ", " + searchStr + ")", - "contains(" + stringField + ", " + searchStr + ")" - ); + return StringFunctionGenerator.stringToBoolFunction(columns, allowUnmapped); } /** * Generates a concat function with multiple arguments. * May randomly include unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String concatFunction(List columns, boolean allowUnmapped) { - List stringFields = columns.stream() - .filter(c -> c.type().equals("keyword") || c.type().equals("text")) - .map(c -> EsqlQueryGenerator.needsQuoting(c.name()) ? EsqlQueryGenerator.quote(c.name()) : c.name()) - .limit(randomIntBetween(2, 4)) - .collect(Collectors.toList()); - if (stringFields.isEmpty()) { - return null; - } - // Possibly add an unmapped field to the concat arguments - if (allowUnmapped && shouldAddUnmappedField()) { - stringFields.add(randomUnmappedFieldName()); - } - if (stringFields.size() < 2) { - return null; - } - return "concat(" + String.join(", ", stringFields) + ")"; + return StringFunctionGenerator.concatFunction(columns, allowUnmapped); } /** * Generates a split function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String splitFunction(List columns, boolean allowUnmapped) { - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField == null) { - return null; - } - String delimiter = randomFrom(",", " ", "-", "_", ":"); - return "split(" + stringField + ", \"" + delimiter + "\")"; + return StringFunctionGenerator.splitFunction(columns, allowUnmapped); } - // ========== DATE FUNCTIONS ========== + // ========= Date ========= /** * Generates a date function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String dateFunction(List columns, boolean allowUnmapped) { - String dateField = fieldOrUnmapped(EsqlQueryGenerator.randomName(columns, Set.of("date", "datetime")), allowUnmapped); - if (dateField == null) { - return null; - } - String datePart = randomFrom( - "YEAR", - "MONTH_OF_YEAR", - "DAY_OF_MONTH", - "HOUR_OF_DAY", - "MINUTE_OF_HOUR", - "SECOND_OF_MINUTE", - "DAY_OF_WEEK", - "DAY_OF_YEAR", - "ALIGNED_WEEK_OF_YEAR" - ); - String interval = randomFrom("1 day", "1 hour", "1 week", "1 month", "1 year"); - return randomFrom( - "date_extract(\"" + datePart + "\", " + dateField + ")", - "date_trunc(" + interval + ", " + dateField + ")", - "date_format(\"yyyy-MM-dd\", " + dateField + ")", - "day_name(" + dateField + ")", - "month_name(" + dateField + ")", - "now()" - ); + return DateFunctionGenerator.dateFunction(columns, allowUnmapped); } /** * Generates a date_diff function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String dateDiffFunction(List columns, boolean allowUnmapped) { - List dateFields = columns.stream() - .filter(c -> c.type().equals("date") || c.type().equals("datetime")) - .map(c -> EsqlQueryGenerator.needsQuoting(c.name()) ? EsqlQueryGenerator.quote(c.name()) : c.name()) - .collect(Collectors.toList()); - // Possibly add an unmapped field - if (allowUnmapped && shouldAddUnmappedField()) { - dateFields.add(randomUnmappedFieldName()); - } - if (dateFields.size() < 2) { - return null; - } - String unit = randomFrom("second", "minute", "hour", "day", "week", "month", "year"); - return "date_diff(\"" + unit + "\", " + dateFields.get(0) + ", " + dateFields.get(1) + ")"; + return DateFunctionGenerator.dateDiffFunction(columns, allowUnmapped); } - // ========== TYPE CONVERSION FUNCTIONS ========== + // ========= Type conversion ========= /** * Generates a type conversion function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String conversionFunction(List columns, boolean allowUnmapped) { - // Occasionally use unmapped field for conversion functions - String unmapped = maybeUnmappedField(allowUnmapped); - if (unmapped != null) { - return randomFrom( - "to_string(" + unmapped + ")", - "to_integer(" + unmapped + ")", - "to_long(" + unmapped + ")", - "to_double(" + unmapped + ")" - ); - } - - // to_string - works on most types (but not counter types, aggregate_metric_double, etc.) - String anyField = randomScalarField(columns); - if (anyField != null && randomBoolean()) { - return "to_string(" + fieldOrUnmapped(anyField, allowUnmapped) + ")"; - } - - // Numeric conversions - String numericField = EsqlQueryGenerator.randomNumericField(columns); - if (numericField != null) { - return randomFrom( - "to_integer(" + numericField + ")", - "to_long(" + numericField + ")", - "to_double(" + numericField + ")", - "to_string(" + numericField + ")", - "to_degrees(" + numericField + ")", - "to_radians(" + numericField + ")" - ); - } - - // String to various types - String stringField = EsqlQueryGenerator.randomStringField(columns); - if (stringField != null) { - return randomFrom("to_string(" + stringField + ")", "to_lower(" + stringField + ")"); - } - - return null; + return TypeConversionFunctionGenerator.conversionFunction(columns, allowUnmapped); } - // ========== CONDITIONAL FUNCTIONS ========== + // ========= Conditional ========= /** * Generates a CASE expression. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String caseFunction(List columns, boolean allowUnmapped) { - String numericField = fieldOrUnmapped(EsqlQueryGenerator.randomNumericField(columns), allowUnmapped); - if (numericField == null) { - return null; - } - int threshold = randomIntBetween(0, 100); - return "case(" + numericField + " > " + threshold + ", \"high\", \"low\")"; + return ConditionalFunctionGenerator.caseFunction(columns, allowUnmapped); } /** * Generates a COALESCE expression. - * IMPORTANT: All arguments must be of the same type. COALESCE does NOT do type coercion. - * Only uses columns with commonly supported types to ensure the result can be consumed - * by other functions (e.g. top(), greatest/least, aggregation functions). - * May randomly include unmapped field names to test NULL data type handling. - * This is especially useful for coalesce since it's designed to handle nulls. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names + * All arguments must be of the same type (no implicit type coercion). */ public static String coalesceFunction(List columns, boolean allowUnmapped) { - // COALESCE requires all arguments to be the SAME type - // Only use commonly supported types so the result type is compatible with other functions - var columnsByType = columns.stream() - .filter(c -> COMMONLY_SUPPORTED_TYPES.contains(c.type())) - .collect(Collectors.groupingBy(Column::type)); - - // Find a type that has at least one field - List sameTypeColumns = null; - for (var entry : columnsByType.entrySet()) { - if (entry.getValue().isEmpty() == false) { - sameTypeColumns = entry.getValue(); - break; - } - } - - if (sameTypeColumns == null || sameTypeColumns.isEmpty()) { - return null; - } - - String field1Raw = sameTypeColumns.get(randomIntBetween(0, sameTypeColumns.size() - 1)).name(); - String field1 = EsqlQueryGenerator.needsQuoting(field1Raw) ? EsqlQueryGenerator.quote(field1Raw) : field1Raw; - - // Coalesce is perfect for testing unmapped fields - it handles nulls by design - // Use unmapped field as first argument (will be null, so second arg is returned) - if (allowUnmapped && shouldAddUnmappedFieldWithProbabilityIncrease(2)) { - String unmapped = randomUnmappedFieldName(); - return "coalesce(" + unmapped + ", " + field1 + ")"; - } - - // Pick a second field of the same type - if (sameTypeColumns.size() >= 2) { - String field2Raw = sameTypeColumns.get(randomIntBetween(0, sameTypeColumns.size() - 1)).name(); - String field2 = EsqlQueryGenerator.needsQuoting(field2Raw) ? EsqlQueryGenerator.quote(field2Raw) : field2Raw; - if (field1.equals(field2) == false) { - return "coalesce(" + field1 + ", " + field2 + ")"; - } - } - - // Fallback: use null literal as second argument (always valid) - return "coalesce(" + field1 + ", null)"; + return ConditionalFunctionGenerator.coalesceFunction(columns, allowUnmapped); } /** * Generates a GREATEST or LEAST expression. - * IMPORTANT: All arguments must be of the same type. These functions do NOT do type coercion. - * May randomly include unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names + * All arguments must be of the same type (no implicit type coercion). */ public static String greatestLeastFunction(List columns, boolean allowUnmapped) { - // GREATEST/LEAST require all arguments to be the SAME type - pick one type and stick with it - String targetType = randomFrom("integer", "long", "double"); - List sameTypeFields = columns.stream() - .filter(c -> c.type().equals(targetType)) - .map(c -> EsqlQueryGenerator.needsQuoting(c.name()) ? EsqlQueryGenerator.quote(c.name()) : c.name()) - .collect(Collectors.toList()); - - // Possibly add an unmapped field (which has NULL type, accepted by these functions) - if (allowUnmapped && shouldAddUnmappedField() && sameTypeFields.isEmpty() == false) { - sameTypeFields.add(randomUnmappedFieldName()); - } - - if (sameTypeFields.size() < 2) { - // Not enough fields of the same type, try with constants of a consistent type - String numericField = EsqlQueryGenerator.randomNumericField(columns); - if (numericField != null) { - // Use the same field multiple times with different constant comparisons - String func = randomBoolean() ? "greatest" : "least"; - int val1 = randomIntBetween(-100, 100); - int val2 = randomIntBetween(-100, 100); - return func + "(" + numericField + ", " + val1 + ", " + val2 + ")"; - } - return null; - } - - String func = randomBoolean() ? "greatest" : "least"; - int numArgs = Math.min(sameTypeFields.size(), randomIntBetween(2, 4)); - return func + "(" + String.join(", ", sameTypeFields.subList(0, numArgs)) + ")"; + return ConditionalFunctionGenerator.greatestLeastFunction(columns, allowUnmapped); } - // ========== MULTIVALUE FUNCTIONS ========== + // ========= Multivalue ========= /** * Generates a multivalue function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String mvFunction(List columns, boolean allowUnmapped) { - // Use randomScalarField to avoid counter types, aggregate_metric_double, etc. - String anyField = fieldOrUnmapped(randomScalarField(columns), allowUnmapped); - if (anyField == null) { - // Fall back to just unmapped field - anyField = maybeUnmappedField(allowUnmapped); - if (anyField == null) { - return null; - } - } - - // Functions that work on any type - String genericMvFunc = randomFrom( - "mv_count(" + anyField + ")", - "mv_first(" + anyField + ")", - "mv_last(" + anyField + ")", - "mv_dedupe(" + anyField + ")" - ); - - String numericField = fieldOrUnmapped(EsqlQueryGenerator.randomNumericField(columns), allowUnmapped); - if (numericField != null && randomBoolean()) { - return randomFrom( - "mv_min(" + numericField + ")", - "mv_max(" + numericField + ")", - "mv_avg(" + numericField + ")", - "mv_sum(" + numericField + ")", - "mv_median(" + numericField + ")" - ); - } - - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField != null && randomBoolean()) { - return randomFrom( - "mv_concat(" + stringField + ", \", \")", - "mv_sort(" + stringField + ")", - "mv_sort(" + stringField + ", \"desc\")" - ); - } - - return genericMvFunc; + return MvFunctionGenerator.mvFunction(columns, allowUnmapped); } /** - * Generates mv_slice or mv_zip functions. + * Generates mv_slice functions. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String mvSliceZipFunction(List columns, boolean allowUnmapped) { - // Use randomScalarField to avoid counter types, aggregate_metric_double, etc. - String field = fieldOrUnmapped(randomScalarField(columns), allowUnmapped); - if (field == null) { - return null; - } - int start = randomIntBetween(0, 3); - int end = start + randomIntBetween(1, 5); - return randomFrom("mv_slice(" + field + ", " + start + ", " + end + ")", "mv_slice(" + field + ", " + start + ")"); + return MvFunctionGenerator.mvSliceZipFunction(columns, allowUnmapped); } - // ========== IP FUNCTIONS ========== + // ========= IP ========= /** * Generates an cidr_match function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String cidrMatchFunction(List columns, boolean allowUnmapped) { - String ipField = fieldOrUnmapped(EsqlQueryGenerator.randomName(columns, Set.of("ip")), allowUnmapped); - if (ipField == null) { - return null; - } - String cidr = randomFrom("10.0.0.0/8", "192.168.0.0/16", "172.16.0.0/12", "0.0.0.0/0"); - return "cidr_match(" + ipField + ", \"" + cidr + "\")"; + return IpFunctionGenerator.cidrMatchFunction(columns, allowUnmapped); } /** * Generates an ip_prefix function. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String ipPrefixFunction(List columns, boolean allowUnmapped) { - String ipField = fieldOrUnmapped(EsqlQueryGenerator.randomName(columns, Set.of("ip")), allowUnmapped); - if (ipField == null) { - return null; - } - return "ip_prefix(" + ipField + ", " + randomIntBetween(8, 32) + ", " + randomIntBetween(48, 128) + ")"; + return IpFunctionGenerator.ipPrefixFunction(columns, allowUnmapped); } - // ========== BOOLEAN EXPRESSIONS ========== + // ========= Boolean expressions ========= /** * Generates an IS NULL / IS NOT NULL expression. * May randomly use unmapped field names - especially useful for testing IS NULL on unmapped fields. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String isNullExpression(List columns, boolean allowUnmapped) { - // Higher probability for unmapped fields in IS NULL expressions since they're always null - if (allowUnmapped && shouldAddUnmappedFieldWithProbabilityIncrease(3)) { - String unmapped = randomUnmappedFieldName(); - // Unmapped fields are always null, so IS NULL should be true, IS NOT NULL should be false - return unmapped + (randomBoolean() ? " IS NULL" : " IS NOT NULL"); - } - String field = EsqlQueryGenerator.randomName(columns); - if (field == null) { - return null; - } - return field + (randomBoolean() ? " IS NULL" : " IS NOT NULL"); + return BooleanExpressionGenerator.isNullExpression(columns, allowUnmapped); } /** * Generates an IN expression. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String inExpression(List columns, boolean allowUnmapped) { - String numericField = fieldOrUnmapped(EsqlQueryGenerator.randomNumericField(columns), allowUnmapped); - if (numericField != null && randomBoolean()) { - int val1 = randomIntBetween(0, 100); - int val2 = randomIntBetween(0, 100); - int val3 = randomIntBetween(0, 100); - return numericField + " IN (" + val1 + ", " + val2 + ", " + val3 + ")"; - } - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField != null) { - return stringField + " IN (\"a\", \"b\", \"c\")"; - } - return null; + return BooleanExpressionGenerator.inExpression(columns, allowUnmapped); } /** * Generates a LIKE expression. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String likeExpression(List columns, boolean allowUnmapped) { - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField == null) { - return null; - } - String pattern = randomFrom("*", "a*", "*b", "*test*", "???"); - return stringField + " LIKE \"" + pattern + "\""; + return BooleanExpressionGenerator.likeExpression(columns, allowUnmapped); } /** * Generates an RLIKE expression. * May randomly use unmapped field names to test NULL data type handling. - * - * @param columns the available columns - * @param allowUnmapped if true, may use unmapped field names */ public static String rlikeExpression(List columns, boolean allowUnmapped) { - String stringField = fieldOrUnmapped(EsqlQueryGenerator.randomStringField(columns), allowUnmapped); - if (stringField == null) { - return null; - } - String pattern = randomFrom(".*", "a.*", ".*b", ".*test.*", ".{3}"); - return stringField + " RLIKE \"" + pattern + "\""; + return BooleanExpressionGenerator.rlikeExpression(columns, allowUnmapped); } - // ========== TYPE-SAFE EXPRESSION GENERATORS ========== + // ========= Full-text ========= /** - * Generates a random expression that is guaranteed to return one of the given accepted types. - * This should be used when the expression will be passed as an argument to a function with - * specific type constraints (e.g. top(), greatest/least, etc.). - *

- * Prefers generating a function expression wrapping a compatible field, but falls back - * to a plain field reference if no function can be generated. - * - * @param columns the available columns - * @param acceptedTypes the set of types the calling function accepts (e.g. {"integer", "long", "double", "keyword", "date"}) - * @param allowUnmapped if true, may use unmapped field names - * @return an expression string whose output type is in acceptedTypes, or null if none can be generated + * Generates a random full-text search boolean expression. + * See {@link org.elasticsearch.xpack.esql.generator.function.FullTextFunctionGenerator} for placement and field-origin constraints. */ - public static String typeSafeExpression(List columns, Set acceptedTypes, boolean allowUnmapped) { - // First try to generate a function expression with a known compatible return type - if (randomIntBetween(0, 10) < 5) { - String funcExpr = typeSafeFunctionExpression(columns, acceptedTypes, allowUnmapped); - if (funcExpr != null) { - return funcExpr; - } - } - // Fall back to a direct field reference of a compatible type - return EsqlQueryGenerator.randomName(columns, acceptedTypes); + public static String fullTextFunction(List columns, List previousCommands) { + return FullTextFunctionGenerator.fullTextFunction(columns, previousCommands); } + // ========= Type-safe expressions ========= + /** - * Generates a function expression whose return type is guaranteed to be in the accepted types set. - * Each generator is mapped to its known return type category. - * - * @param columns the available columns - * @param acceptedTypes types the consuming function accepts - * @param allowUnmapped if true, may use unmapped field names - * @return a function expression string with a compatible return type, or null + * Generates an expression whose resulting type is in {@code acceptedTypes}. */ - private static String typeSafeFunctionExpression(List columns, Set acceptedTypes, boolean allowUnmapped) { - boolean acceptsNumeric = acceptedTypes.contains("integer") || acceptedTypes.contains("long") || acceptedTypes.contains("double"); - boolean acceptsString = acceptedTypes.contains("keyword") || acceptedTypes.contains("text"); - boolean acceptsDate = acceptedTypes.contains("date") || acceptedTypes.contains("datetime"); - - // Build a list of candidate generators that produce compatible types - // Each entry is a generator function that returns an expression of the stated type - ArrayList> candidates = new ArrayList<>(); - - if (acceptsNumeric) { - candidates.add(() -> mathFunction(columns, allowUnmapped)); // returns numeric - candidates.add(() -> binaryMathFunction(columns, allowUnmapped)); // returns numeric - candidates.add(() -> stringToIntFunction(columns, allowUnmapped)); // returns integer - candidates.add(() -> clampFunction(columns, allowUnmapped)); // returns numeric - } - if (acceptsString) { - candidates.add(() -> stringFunction(columns, allowUnmapped)); // returns string - candidates.add(() -> concatFunction(columns, allowUnmapped)); // returns keyword - } - if (acceptsDate) { - // date_trunc returns date, now() returns date - String dateField = EsqlQueryGenerator.randomName(columns, Set.of("date", "datetime")); - if (dateField != null) { - String interval = randomFrom("1 day", "1 hour", "1 week", "1 month", "1 year"); - candidates.add(() -> "date_trunc(" + interval + ", " + dateField + ")"); - candidates.add(() -> "now()"); - } - } - - if (candidates.isEmpty()) { - return null; - } - - // Try a few candidates (some may return null if no suitable fields exist) - for (int attempt = 0; attempt < 3; attempt++) { - String result = randomFrom(candidates).get(); - if (result != null) { - return result; - } - } - return null; + public static String typeSafeExpression(List columns, Set acceptedTypes, boolean allowUnmapped) { + return TypeSafeExpressionGenerator.typeSafeExpression(columns, acceptedTypes, allowUnmapped); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/CommandGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/CommandGenerator.java index 92d6f4c3df3e3..9f9c86d765d57 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/CommandGenerator.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/CommandGenerator.java @@ -10,7 +10,6 @@ import org.elasticsearch.xpack.esql.CsvTestsDataLoader; import org.elasticsearch.xpack.esql.generator.Column; import org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator; -import org.elasticsearch.xpack.esql.generator.FunctionGenerator; import org.elasticsearch.xpack.esql.generator.LookupIdx; import org.elasticsearch.xpack.esql.generator.QueryExecutor; @@ -18,6 +17,8 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.isUnmappedFieldsEnabled; + /** * Implement this if you want to your command to be tested by the random query generator. * Then add it to the right list in {@link EsqlQueryGenerator} @@ -131,7 +132,7 @@ static ValidationResult expectSameColumns( List columns ) { - if (FunctionGenerator.isUnmappedFieldsEnabled(previousCommands)) { + if (isUnmappedFieldsEnabled(previousCommands)) { return VALIDATION_OK; } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/EvalGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/EvalGenerator.java index 50bce7ee40ef5..e155b7f8bc88d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/EvalGenerator.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/EvalGenerator.java @@ -9,7 +9,6 @@ import org.elasticsearch.xpack.esql.generator.Column; import org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator; -import org.elasticsearch.xpack.esql.generator.FunctionGenerator; import org.elasticsearch.xpack.esql.generator.QueryExecutor; import org.elasticsearch.xpack.esql.generator.command.CommandGenerator; @@ -21,6 +20,7 @@ import static org.elasticsearch.test.ESTestCase.randomBoolean; import static org.elasticsearch.test.ESTestCase.randomIntBetween; import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.unquote; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.isUnmappedFieldsEnabled; public class EvalGenerator implements CommandGenerator { @@ -90,7 +90,7 @@ public ValidationResult validateOutput( List expectedColumns = (List) commandDescription.context().get(NEW_COLUMNS); List resultColNames = columns.stream().map(Column::name).toList(); List lastColumns = resultColNames.subList(resultColNames.size() - expectedColumns.size(), resultColNames.size()); - if (FunctionGenerator.isUnmappedFieldsEnabled(previousCommands) == false + if (isUnmappedFieldsEnabled(previousCommands) == false && (columns.size() < expectedColumns.size() || lastColumns.equals(expectedColumns) == false)) { return new ValidationResult( false, diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/KeepGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/KeepGenerator.java index beb24a4e43ebe..28525f461d352 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/KeepGenerator.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/pipe/KeepGenerator.java @@ -9,7 +9,6 @@ import org.elasticsearch.xpack.esql.generator.Column; import org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator; -import org.elasticsearch.xpack.esql.generator.FunctionGenerator; import org.elasticsearch.xpack.esql.generator.QueryExecutor; import org.elasticsearch.xpack.esql.generator.command.CommandGenerator; @@ -22,6 +21,7 @@ import static org.elasticsearch.test.ESTestCase.randomBoolean; import static org.elasticsearch.test.ESTestCase.randomFrom; import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.isUnmappedFieldsEnabled; public class KeepGenerator implements CommandGenerator { @@ -41,7 +41,7 @@ public CommandDescription generate( QuerySchema schema, QueryExecutor executor ) { - boolean unmappedFieldsEnabled = FunctionGenerator.isUnmappedFieldsEnabled(previousCommands); + boolean unmappedFieldsEnabled = isUnmappedFieldsEnabled(previousCommands); int n = randomIntBetween(1, previousOutput.size()); Set proj = new HashSet<>(); for (int i = 0; i < n; i++) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/source/FromGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/source/FromGenerator.java index c9af764b855bc..536fbb3a06908 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/source/FromGenerator.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/command/source/FromGenerator.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.generator.QueryExecutor; import org.elasticsearch.xpack.esql.generator.command.CommandGenerator; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -28,8 +29,23 @@ public class FromGenerator implements CommandGenerator { */ public static final String UNMAPPED_FIELDS_ENABLED = "unmappedFieldsEnabled"; + /** + * Context key for the set of field names that come from the actual index mapping. + * Populated by the executor after the FROM command runs. + * Full-text functions that require FieldAttribute arguments use this to avoid computed columns. + */ + public static final String INDEX_FIELD_NAMES = "indexFieldNames"; + public static final String SET_UNMAPPED_FIELDS_PREFIX = "SET unmapped_fields=\"nullify\";"; + /** + * Returns {@code true} if the given command is a FROM source command. + * Used to gate full-text function generation which are only valid when the query originates from a FROM command (not TS or PROMQL). + */ + public static boolean isFromSource(CommandDescription command) { + return command != null && "from".equals(command.commandName()); + } + @Override public CommandDescription generate( List previousCommands, @@ -53,7 +69,9 @@ public CommandDescription generate( result.append(pattern); } String query = result.toString(); - return new CommandDescription("from", this, query, Map.of(UNMAPPED_FIELDS_ENABLED, useUnmappedFields)); + Map context = new HashMap<>(); + context.put(UNMAPPED_FIELDS_ENABLED, useUnmappedFields); + return new CommandDescription("from", this, query, context); } @Override diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/BooleanExpressionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/BooleanExpressionGenerator.java new file mode 100644 index 0000000000000..0af49c5bb65ef --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/BooleanExpressionGenerator.java @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; + +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomName; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomNumericField; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomStringField; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.shouldAddUnmappedFieldWithProbabilityIncrease; +import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; + +/** + * Generates random boolean expressions used in WHERE clauses. + */ +public final class BooleanExpressionGenerator { + + private BooleanExpressionGenerator() {} + + /** + * Generates an IS NULL / IS NOT NULL expression. + * May randomly use unmapped field names - especially useful for testing IS NULL on unmapped fields. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String isNullExpression(List columns, boolean allowUnmapped) { + if (allowUnmapped && shouldAddUnmappedFieldWithProbabilityIncrease(3)) { + String unmapped = randomUnmappedFieldName(); + return unmapped + (randomBoolean() ? " IS NULL" : " IS NOT NULL"); + } + String field = randomName(columns); + if (field == null) { + return null; + } + return field + (randomBoolean() ? " IS NULL" : " IS NOT NULL"); + } + + /** + * Generates an IN expression. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String inExpression(List columns, boolean allowUnmapped) { + String numericField = fieldOrUnmapped(randomNumericField(columns), allowUnmapped); + if (numericField != null && randomBoolean()) { + int val1 = randomIntBetween(0, 100); + int val2 = randomIntBetween(0, 100); + int val3 = randomIntBetween(0, 100); + return numericField + " IN (" + val1 + ", " + val2 + ", " + val3 + ")"; + } + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField != null) { + return stringField + " IN (\"a\", \"b\", \"c\")"; + } + return null; + } + + /** + * Generates a LIKE expression. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String likeExpression(List columns, boolean allowUnmapped) { + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField == null) { + return null; + } + String pattern = randomFrom("*", "a*", "*b", "*test*", "???"); + return stringField + " LIKE \"" + pattern + "\""; + } + + /** + * Generates an RLIKE expression. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String rlikeExpression(List columns, boolean allowUnmapped) { + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField == null) { + return null; + } + String pattern = randomFrom(".*", "a.*", ".*b", ".*test.*", ".{3}"); + return stringField + " RLIKE \"" + pattern + "\""; + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/ConditionalFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/ConditionalFunctionGenerator.java new file mode 100644 index 0000000000000..e507edf9b099e --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/ConditionalFunctionGenerator.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.needsQuoting; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.quote; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomNumericField; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.COMMONLY_SUPPORTED_TYPES; +import static org.elasticsearch.xpack.esql.generator.FunctionGenerator.shouldAddUnmappedFieldWithProbabilityIncrease; +import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.shouldAddUnmappedField; + +/** + * Generates random conditional function expressions. + */ +public final class ConditionalFunctionGenerator { + + private ConditionalFunctionGenerator() {} + + /** + * Generates a CASE expression. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String caseFunction(List columns, boolean allowUnmapped) { + String numericField = fieldOrUnmapped(randomNumericField(columns), allowUnmapped); + if (numericField == null) { + return null; + } + int threshold = randomIntBetween(0, 100); + return "case(" + numericField + " > " + threshold + ", \"high\", \"low\")"; + } + + /** + * Generates a COALESCE expression. + * IMPORTANT: All arguments must be of the same type. COALESCE does NOT do type coercion. + * Only uses columns with commonly supported types to ensure the result can be consumed + * by other functions. + *

+ * May randomly include unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String coalesceFunction(List columns, boolean allowUnmapped) { + var columnsByType = columns.stream() + .filter(c -> COMMONLY_SUPPORTED_TYPES.contains(c.type())) + .collect(Collectors.groupingBy(Column::type)); + + List sameTypeColumns = null; + for (var entry : columnsByType.entrySet()) { + if (entry.getValue().isEmpty() == false) { + sameTypeColumns = entry.getValue(); + break; + } + } + + if (sameTypeColumns == null || sameTypeColumns.isEmpty()) { + return null; + } + + String field1Raw = sameTypeColumns.get(randomIntBetween(0, sameTypeColumns.size() - 1)).name(); + String field1 = needsQuoting(field1Raw) ? quote(field1Raw) : field1Raw; + + if (allowUnmapped && shouldAddUnmappedFieldWithProbabilityIncrease(2)) { + String unmapped = randomUnmappedFieldName(); + return "coalesce(" + unmapped + ", " + field1 + ")"; + } + + if (sameTypeColumns.size() >= 2) { + String field2Raw = sameTypeColumns.get(randomIntBetween(0, sameTypeColumns.size() - 1)).name(); + String field2 = needsQuoting(field2Raw) ? quote(field2Raw) : field2Raw; + if (field1.equals(field2) == false) { + return "coalesce(" + field1 + ", " + field2 + ")"; + } + } + + return "coalesce(" + field1 + ", null)"; + } + + /** + * Generates a GREATEST or LEAST expression. + * IMPORTANT: All arguments must be of the same type. These functions do NOT do type coercion. + * May randomly include unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String greatestLeastFunction(List columns, boolean allowUnmapped) { + String targetType = randomFrom("integer", "long", "double"); + List sameTypeFields = columns.stream() + .filter(c -> c.type().equals(targetType)) + .map(c -> needsQuoting(c.name()) ? quote(c.name()) : c.name()) + .collect(Collectors.toList()); + + if (allowUnmapped && shouldAddUnmappedField() && sameTypeFields.isEmpty() == false) { + sameTypeFields.add(randomUnmappedFieldName()); + } + + if (sameTypeFields.size() < 2) { + String numericField = randomNumericField(columns); + if (numericField != null) { + String func = randomBoolean() ? "greatest" : "least"; + int val1 = randomIntBetween(-100, 100); + int val2 = randomIntBetween(-100, 100); + return func + "(" + numericField + ", " + val1 + ", " + val2 + ")"; + } + return null; + } + + String func = randomBoolean() ? "greatest" : "least"; + int numArgs = Math.min(sameTypeFields.size(), randomIntBetween(2, 4)); + return func + "(" + String.join(", ", sameTypeFields.subList(0, numArgs)) + ")"; + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/DateFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/DateFunctionGenerator.java new file mode 100644 index 0000000000000..284e5c6dabf91 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/DateFunctionGenerator.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.needsQuoting; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.quote; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomName; +import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.shouldAddUnmappedField; + +/** + * Generates random date function expressions. + */ +public final class DateFunctionGenerator { + + private DateFunctionGenerator() {} + + /** + * Generates a date function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String dateFunction(List columns, boolean allowUnmapped) { + String dateField = fieldOrUnmapped(randomName(columns, java.util.Set.of("date", "datetime")), allowUnmapped); + if (dateField == null) { + return null; + } + String datePart = randomFrom( + "YEAR", + "MONTH_OF_YEAR", + "DAY_OF_MONTH", + "HOUR_OF_DAY", + "MINUTE_OF_HOUR", + "SECOND_OF_MINUTE", + "DAY_OF_WEEK", + "DAY_OF_YEAR", + "ALIGNED_WEEK_OF_YEAR" + ); + String interval = randomFrom("1 day", "1 hour", "1 week", "1 month", "1 year"); + return randomFrom( + "date_extract(\"" + datePart + "\", " + dateField + ")", + "date_trunc(" + interval + ", " + dateField + ")", + "date_format(\"yyyy-MM-dd\", " + dateField + ")", + "day_name(" + dateField + ")", + "month_name(" + dateField + ")", + "now()" + ); + } + + /** + * Generates a date_diff function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String dateDiffFunction(List columns, boolean allowUnmapped) { + List dateFields = columns.stream() + .filter(c -> c.type().equals("date") || c.type().equals("datetime")) + .map(c -> needsQuoting(c.name()) ? quote(c.name()) : c.name()) + .collect(Collectors.toList()); + if (allowUnmapped && shouldAddUnmappedField()) { + dateFields.add(randomUnmappedFieldName()); + } + if (dateFields.size() < 2) { + return null; + } + String unit = randomFrom("second", "minute", "hour", "day", "week", "month", "year"); + return "date_diff(\"" + unit + "\", " + dateFields.get(0) + ", " + dateFields.get(1) + ")"; + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/FullTextFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/FullTextFunctionGenerator.java new file mode 100644 index 0000000000000..04bad2eecb127 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/FullTextFunctionGenerator.java @@ -0,0 +1,315 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; +import org.elasticsearch.xpack.esql.generator.command.CommandGenerator; +import org.elasticsearch.xpack.esql.generator.command.pipe.EvalGenerator; +import org.elasticsearch.xpack.esql.generator.command.source.FromGenerator; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.needsQuoting; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.quote; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomName; +import static org.elasticsearch.xpack.esql.generator.command.source.FromGenerator.isFromSource; + +/** + * Generates random full-text search expressions (match/match_phrase/qstr/kql/multi_match/:). + */ +public final class FullTextFunctionGenerator { + + private FullTextFunctionGenerator() {} + + private static final Set QSTR_KQL_SAFE_COMMANDS = Set.of("from", "where", "sort"); + + private static boolean isFullTextAllowed(List previousCommands) { + if (previousCommands == null || previousCommands.isEmpty()) { + return false; + } + if (isFromSource(previousCommands.get(0)) == false) { + return false; + } + for (CommandGenerator.CommandDescription cmd : previousCommands) { + if ("limit".equals(cmd.commandName()) + || "stats".equals(cmd.commandName()) + || "inline stats".equals(cmd.commandName()) + || "change_point".equals(cmd.commandName()) + || "mv_expand".equals(cmd.commandName())) { + return false; + } + } + return true; + } + + private static boolean isQstrKqlAllowed(List previousCommands) { + if (isFullTextAllowed(previousCommands) == false) { + return false; + } + for (CommandGenerator.CommandDescription cmd : previousCommands) { + if (QSTR_KQL_SAFE_COMMANDS.contains(cmd.commandName()) == false) { + return false; + } + } + return true; + } + + private static final Pattern RENAME_PAIR = Pattern.compile("\\s*`?([^`]+?)`?\\s+[Aa][Ss]\\s+`?([^`]+?)`?\\s*"); + + @SuppressWarnings("unchecked") + private static List indexFieldColumns(List columns, List previousCommands) { + if (previousCommands == null || previousCommands.isEmpty()) { + return null; + } + Object stored = previousCommands.get(0).context().get(FromGenerator.INDEX_FIELD_NAMES); + if (stored instanceof Set == false) { + return null; + } + Set safeNames = new HashSet<>((Set) stored); + for (CommandGenerator.CommandDescription cmd : previousCommands) { + if ("eval".equals(cmd.commandName())) { + Object newCols = cmd.context().get(EvalGenerator.NEW_COLUMNS); + if (newCols instanceof List list) { + list.forEach(name -> safeNames.remove((String) name)); + } + } else if ("mv_expand".equals(cmd.commandName())) { + String expandedField = cmd.commandString().replaceFirst("(?i)^\\s*\\|\\s*mv_expand\\s+", "").trim(); + if (expandedField.startsWith("`") && expandedField.endsWith("`")) { + expandedField = expandedField.substring(1, expandedField.length() - 1); + } + safeNames.remove(expandedField); + } else if ("rename".equals(cmd.commandName())) { + String cmdStr = cmd.commandString().replaceFirst("(?i)^\\s*\\|\\s*rename\\s+", ""); + for (String pair : cmdStr.split(",")) { + Matcher m = RENAME_PAIR.matcher(pair); + if (m.matches()) { + String oldName = m.group(1); + String newName = m.group(2); + boolean wasSafe = safeNames.remove(oldName); + if (wasSafe) { + safeNames.add(newName); + } else { + safeNames.remove(newName); + } + } + } + } + } + return columns.stream().filter(c -> safeNames.contains(c.name())).toList(); + } + + private static final Set MATCH_FIELD_TYPES = Set.of( + "keyword", + "text", + "boolean", + "date", + "datetime", + "double", + "integer", + "ip", + "long", + "unsigned_long", + "version" + ); + private static final Set MATCH_PHRASE_FIELD_TYPES = Set.of("keyword", "text"); + + private static final String[] SAMPLE_QUERY_WORDS = { "test", "hello", "world", "data", "search", "quick", "brown", "fox" }; + + private static String randomQueryWord() { + return randomFrom(SAMPLE_QUERY_WORDS); + } + + private static String maybeOptions(String[][] optionPool) { + if (randomIntBetween(0, 4) > 0) { + return ""; + } + int count = Math.min(randomIntBetween(1, 2), optionPool.length); + Set usedIndices = new HashSet<>(); + StringBuilder sb = new StringBuilder(", {"); + int added = 0; + for (int i = 0; i < count; i++) { + int idx = randomIntBetween(0, optionPool.length - 1); + if (usedIndices.add(idx) == false) { + continue; + } + String[] entry = optionPool[idx]; + String name = entry[0]; + String value = entry[randomIntBetween(1, entry.length - 1)]; + if (added > 0) { + sb.append(", "); + } + sb.append("\"").append(name).append("\": ").append(value); + added++; + } + sb.append("}"); + return sb.toString(); + } + + private static final String[][] MATCH_OPTIONS = { + { "operator", "\"AND\"", "\"OR\"" }, + { "fuzziness", "\"AUTO\"", "1", "2" }, + { "lenient", "true", "false" }, + { "boost", "1.0", "2.5" }, + { "zero_terms_query", "\"none\"", "\"all\"" }, }; + + private static final String[][] MATCH_PHRASE_OPTIONS = { + { "slop", "0", "1", "2" }, + { "boost", "1.0", "2.5" }, + { "zero_terms_query", "\"none\"", "\"all\"" }, }; + + private static final String[][] QSTR_OPTIONS = { + { "default_operator", "\"OR\"", "\"AND\"" }, + { "lenient", "true", "false" }, + { "fuzziness", "\"AUTO\"", "1" }, + { "boost", "1.0", "2.5" }, }; + + private static final String[][] KQL_OPTIONS = { { "case_insensitive", "true", "false" }, { "boost", "1.0", "2.5" }, }; + + private static final String[][] MULTI_MATCH_OPTIONS = { + { "operator", "\"AND\"", "\"OR\"" }, + { "lenient", "true", "false" }, + { "boost", "1.0", "2.5" }, + { "type", "\"best_fields\"", "\"most_fields\"", "\"phrase\"" }, }; + + /** + * Generates a {@code match(field, "query")} expression, or its operator variant {@code field : "query"}. + * {@code MatchOperator} extends {@code Match} — they share all constraints. + * The operator form does not support options. + */ + public static String matchFunction(List columns) { + String field = randomName(columns, MATCH_FIELD_TYPES); + if (field == null) { + return null; + } + String query = randomQueryWord(); + if (randomBoolean()) { + return field + " : \"" + query + "\""; + } + return "match(" + field + ", \"" + query + "\"" + maybeOptions(MATCH_OPTIONS) + ")"; + } + + /** + * Generates a {@code match_phrase(field, "query")} expression. + * field accepts: keyword, text only. + * query must be a string literal. + */ + public static String matchPhraseFunction(List columns) { + String field = randomName(columns, MATCH_PHRASE_FIELD_TYPES); + if (field == null) { + return null; + } + String phrase = randomQueryWord() + " " + randomQueryWord(); + return "match_phrase(" + field + ", \"" + phrase + "\"" + maybeOptions(MATCH_PHRASE_OPTIONS) + ")"; + } + + /** + * Generates a {@code qstr("field:query")} expression using Lucene query string syntax. + * query is a string literal; no field argument. + */ + public static String qstrFunction(List columns) { + String field = randomName(columns, MATCH_PHRASE_FIELD_TYPES); + String query; + if (field != null && randomBoolean()) { + String rawName = field.startsWith("`") ? field.substring(1, field.length() - 1) : field; + query = rawName + ":" + randomQueryWord(); + } else { + query = randomQueryWord(); + } + return "qstr(\"" + query + "\"" + maybeOptions(QSTR_OPTIONS) + ")"; + } + + /** + * Generates a {@code kql("field:query")} expression using KQL syntax. + * query is a string literal; no field argument. + */ + public static String kqlFunction(List columns) { + String field = randomName(columns, MATCH_PHRASE_FIELD_TYPES); + String query; + if (field != null && randomBoolean()) { + String rawName = field.startsWith("`") ? field.substring(1, field.length() - 1) : field; + query = rawName + ": " + randomQueryWord(); + } else { + query = randomQueryWord(); + } + return "kql(\"" + query + "\"" + maybeOptions(KQL_OPTIONS) + ")"; + } + + /** + * Generates a {@code multi_match("query", field1, field2 [, ...])} expression. + * Fields accept the same types as match(). Query must be a string literal. + */ + public static String multiMatchFunction(List columns) { + List fields = columns.stream() + .filter(c -> MATCH_FIELD_TYPES.contains(c.type())) + .map(c -> needsQuoting(c.name()) ? quote(c.name()) : c.name()) + .collect(Collectors.toList()); + if (fields.size() < 2) { + return null; + } + int count = Math.min(fields.size(), randomIntBetween(2, 4)); + List selected = new ArrayList<>(); + for (int i = 0; i < count; i++) { + selected.add(fields.get(randomIntBetween(0, fields.size() - 1))); + } + return "multi_match(\"" + randomQueryWord() + "\", " + String.join(", ", selected) + maybeOptions(MULTI_MATCH_OPTIONS) + ")"; + } + + /** + * Generates a random full-text search boolean expression. Picks one of: match (including + * its {@code :} operator variant), match_phrase, qstr, kql, or multi_match. + *

+ * Respects two sets of constraints: + *

    + *
  • Placement: full-text functions are forbidden after LIMIT/STATS; + * QSTR and KQL additionally require all preceding commands to be FROM/WHERE/SORT.
  • + *
  • Field origin: match, match_phrase, and multi_match + * require fields from the actual index mapping (FieldAttribute), not columns + * created by EVAL, GROK, DISSECT, etc.
  • + *
+ * Returns {@code null} when no valid function can be generated. + */ + public static String fullTextFunction(List columns, List previousCommands) { + if (isFullTextAllowed(previousCommands) == false) { + return null; + } + + boolean qstrKqlAllowed = isQstrKqlAllowed(previousCommands); + + List indexColumns = indexFieldColumns(columns, previousCommands); + boolean fieldBasedAllowed = indexColumns != null && indexColumns.isEmpty() == false; + + if (fieldBasedAllowed && qstrKqlAllowed) { + return switch (randomIntBetween(0, 4)) { + case 0 -> matchFunction(indexColumns); + case 1 -> matchPhraseFunction(indexColumns); + case 2 -> qstrFunction(columns); + case 3 -> kqlFunction(columns); + default -> multiMatchFunction(indexColumns); + }; + } else if (fieldBasedAllowed) { + return switch (randomIntBetween(0, 2)) { + case 0 -> matchFunction(indexColumns); + case 1 -> matchPhraseFunction(indexColumns); + default -> multiMatchFunction(indexColumns); + }; + } else if (qstrKqlAllowed) { + return randomBoolean() ? qstrFunction(columns) : kqlFunction(columns); + } else { + return null; + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/FunctionGeneratorUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/FunctionGeneratorUtils.java new file mode 100644 index 0000000000000..e588ad2603f57 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/FunctionGeneratorUtils.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; +import java.util.Set; + +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomName; +import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; + +/** + * Shared helpers used by the random function generators. + */ +final class FunctionGeneratorUtils { + + /** + * Probability (0-100) of using an unmapped field name instead of a real field. + * This tests how functions handle NULL data type from unmapped fields. + */ + private static final int UNMAPPED_FIELD_PROBABILITY = 10; + + /** + * Types that are NOT accepted by most scalar functions. These are special metric/internal types + * that should be excluded when selecting fields for general-purpose function arguments. + */ + private static final Set SCALAR_UNSUPPORTED_TYPES = Set.of( + "counter_long", + "counter_double", + "counter_integer", + "aggregate_metric_double", + "dense_vector", + "tdigest", + "histogram", + "exponential_histogram", + "date_range" + ); + + private FunctionGeneratorUtils() {} + + static boolean shouldAddUnmappedField() { + return shouldAddUnmappedFieldWithProbabilityIncrease(1); + } + + static boolean shouldAddUnmappedFieldWithProbabilityIncrease(int probabilityIncrease) { + assert probabilityIncrease > 0 && probabilityIncrease < 10 : "Probability increase should be in interval [1, 9]"; + return randomIntBetween(0, 100) < UNMAPPED_FIELD_PROBABILITY * probabilityIncrease; + } + + /** + * Returns an unmapped field name with some probability, otherwise returns null. + */ + static String maybeUnmappedField(boolean allowUnmapped) { + if (allowUnmapped == false) { + return null; + } + return shouldAddUnmappedField() ? randomUnmappedFieldName() : null; + } + + /** + * Returns a field name, with some probability returning an unmapped field name instead. + */ + static String fieldOrUnmapped(String realField, boolean allowUnmapped) { + if (realField == null) { + return null; + } + String unmapped = maybeUnmappedField(allowUnmapped); + return unmapped != null ? unmapped : realField; + } + + /** + * Returns a field name suitable for use as a scalar function argument. + * Excludes types that are rejected by most scalar functions (counter types, aggregate_metric_double, etc.). + */ + static String randomScalarField(List columns) { + List suitable = columns.stream().filter(c -> SCALAR_UNSUPPORTED_TYPES.contains(c.type()) == false).toList(); + if (suitable.isEmpty()) { + return null; + } + return randomName(suitable); + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/IpFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/IpFunctionGenerator.java new file mode 100644 index 0000000000000..f54d472967e10 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/IpFunctionGenerator.java @@ -0,0 +1,57 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; +import java.util.Set; + +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomName; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; + +/** + * Generates random IP-related function expressions. + */ +public final class IpFunctionGenerator { + + private IpFunctionGenerator() {} + + /** + * Generates an cidr_match function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String cidrMatchFunction(List columns, boolean allowUnmapped) { + String ipField = fieldOrUnmapped(randomName(columns, Set.of("ip")), allowUnmapped); + if (ipField == null) { + return null; + } + String cidr = randomFrom("10.0.0.0/8", "192.168.0.0/16", "172.16.0.0/12", "0.0.0.0/0"); + return "cidr_match(" + ipField + ", \"" + cidr + "\")"; + } + + /** + * Generates an ip_prefix function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String ipPrefixFunction(List columns, boolean allowUnmapped) { + String ipField = fieldOrUnmapped(randomName(columns, Set.of("ip")), allowUnmapped); + if (ipField == null) { + return null; + } + return "ip_prefix(" + ipField + ", " + randomIntBetween(8, 32) + ", " + randomIntBetween(48, 128) + ")"; + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/MathFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/MathFunctionGenerator.java new file mode 100644 index 0000000000000..4499b2f02318f --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/MathFunctionGenerator.java @@ -0,0 +1,114 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; + +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomNumericField; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.maybeUnmappedField; + +/** + * Generates random math function expressions. + */ +public final class MathFunctionGenerator { + + private MathFunctionGenerator() {} + + /** + * Generates a math function that takes a numeric argument and returns a numeric value. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String mathFunction(List columns, boolean allowUnmapped) { + String numericField = fieldOrUnmapped(randomNumericField(columns), allowUnmapped); + if (numericField == null) { + numericField = maybeUnmappedField(allowUnmapped); + if (numericField == null) { + return null; + } + } + return randomFrom( + "abs(" + numericField + ")", + "ceil(" + numericField + ")", + "floor(" + numericField + ")", + "signum(" + numericField + ")", + "sqrt(abs(" + numericField + "))", + "cbrt(" + numericField + ")", + "exp(" + numericField + " % 10)", + "log10(abs(" + numericField + ") + 1)", + "round(" + numericField + ")", + "round(" + numericField + ", " + randomIntBetween(0, 5) + ")", + "sin(" + numericField + ")", + "cos(" + numericField + ")", + "tan(" + numericField + ")", + "asin(" + numericField + " % 1)", + "acos(" + numericField + " % 1)", + "atan(" + numericField + ")", + "sinh(" + numericField + " % 10)", + "cosh(" + numericField + " % 10)", + "tanh(" + numericField + ")", + "pi()", + "e()", + "tau()" + ); + } + + /** + * Generates a binary math function. + * May randomly use unmapped field names to test NULL data type handling. + * Note: greatest/least are handled separately to ensure type compatibility. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String binaryMathFunction(List columns, boolean allowUnmapped) { + String field1 = fieldOrUnmapped(randomNumericField(columns), allowUnmapped); + String field2 = fieldOrUnmapped(randomNumericField(columns), allowUnmapped); + if (field1 == null || field2 == null) { + return null; + } + return randomFrom( + "pow(" + field1 + ", 2)", + "pow(" + field1 + ", abs(" + field2 + ") % 5 + 1)", + "log(abs(" + field1 + ") + 1, abs(" + field2 + ") + 2)", + "atan2(" + field1 + ", " + field2 + ")", + "hypot(" + field1 + ", " + field2 + ")", + "copy_sign(" + field1 + ", " + field2 + ")", + "scalb(" + field1 + ", " + randomIntBetween(-5, 5) + ")" + ); + } + + /** + * Generates a clamp function (clamp, clamp_min, clamp_max). + * Note: clamp/clamp_min/clamp_max do NOT accept NULL for the field parameter, + * so unmapped fields (which resolve to NULL type) must not be used here. + * + * @param columns the available columns + * @return a clamp expression or {@code null} if no numeric field is available + */ + public static String clampFunction(List columns) { + String numericField = randomNumericField(columns); + if (numericField == null) { + return null; + } + int min = randomIntBetween(-100, 50); + int max = min + randomIntBetween(1, 100); + return randomFrom( + "clamp(" + numericField + ", " + min + ", " + max + ")", + "clamp_min(" + numericField + ", " + min + ")", + "clamp_max(" + numericField + ", " + max + ")" + ); + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/MvFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/MvFunctionGenerator.java new file mode 100644 index 0000000000000..7730b7abecc28 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/MvFunctionGenerator.java @@ -0,0 +1,92 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; + +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomNumericField; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomStringField; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.maybeUnmappedField; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.randomScalarField; + +/** + * Generates random multivalue (mv_*) function expressions. + */ +public final class MvFunctionGenerator { + + private MvFunctionGenerator() {} + + /** + * Generates a multivalue function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String mvFunction(List columns, boolean allowUnmapped) { + String anyField = fieldOrUnmapped(randomScalarField(columns), allowUnmapped); + if (anyField == null) { + anyField = maybeUnmappedField(allowUnmapped); + if (anyField == null) { + return null; + } + } + + String genericMvFunc = randomFrom( + "mv_count(" + anyField + ")", + "mv_first(" + anyField + ")", + "mv_last(" + anyField + ")", + "mv_dedupe(" + anyField + ")" + ); + + String numericField = fieldOrUnmapped(randomNumericField(columns), allowUnmapped); + if (numericField != null && randomBoolean()) { + return randomFrom( + "mv_min(" + numericField + ")", + "mv_max(" + numericField + ")", + "mv_avg(" + numericField + ")", + "mv_sum(" + numericField + ")", + "mv_median(" + numericField + ")" + ); + } + + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField != null && randomBoolean()) { + return randomFrom( + "mv_concat(" + stringField + ", \", \")", + "mv_sort(" + stringField + ")", + "mv_sort(" + stringField + ", \"desc\")" + ); + } + + return genericMvFunc; + } + + /** + * Generates mv_slice functions. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String mvSliceZipFunction(List columns, boolean allowUnmapped) { + String field = fieldOrUnmapped(randomScalarField(columns), allowUnmapped); + if (field == null) { + return null; + } + int start = randomIntBetween(0, 3); + int end = start + randomIntBetween(1, 5); + return randomFrom("mv_slice(" + field + ", " + start + ", " + end + ")", "mv_slice(" + field + ", " + start + ")"); + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/StringFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/StringFunctionGenerator.java new file mode 100644 index 0000000000000..2577b7c441f07 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/StringFunctionGenerator.java @@ -0,0 +1,148 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.needsQuoting; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.quote; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomStringField; +import static org.elasticsearch.xpack.esql.generator.command.pipe.KeepGenerator.randomUnmappedFieldName; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.shouldAddUnmappedField; + +/** + * Generates random string function expressions. + */ +public final class StringFunctionGenerator { + + private StringFunctionGenerator() {} + + /** + * Generates a string function that returns a string. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String stringFunction(List columns, boolean allowUnmapped) { + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField == null) { + return null; + } + return randomFrom( + "to_lower(" + stringField + ")", + "to_upper(" + stringField + ")", + "trim(" + stringField + ")", + "ltrim(" + stringField + ")", + "rtrim(" + stringField + ")", + "reverse(" + stringField + ")", + "left(" + stringField + ", " + randomIntBetween(1, 10) + ")", + "right(" + stringField + ", " + randomIntBetween(1, 10) + ")", + "substring(" + stringField + ", " + randomIntBetween(0, 5) + ", " + randomIntBetween(1, 10) + ")", + "repeat(" + stringField + ", " + randomIntBetween(1, 3) + ")", + "space(" + randomIntBetween(0, 10) + ")", + "replace(" + stringField + ", \"a\", \"b\")", + "md5(" + stringField + ")", + "sha1(" + stringField + ")", + "sha256(" + stringField + ")", + "to_base64(" + stringField + ")", + "from_base64(to_base64(" + stringField + "))", + "url_encode(" + stringField + ")", + "url_decode(" + stringField + ")" + ); + } + + /** + * Generates a string function that returns an integer (length-like functions). + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String stringToIntFunction(List columns, boolean allowUnmapped) { + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField == null) { + return null; + } + return randomFrom( + "length(" + stringField + ")", + "bit_length(" + stringField + ")", + "byte_length(" + stringField + ")", + "locate(" + stringField + ", \"a\")", + "locate(" + stringField + ", \"a\", " + randomIntBetween(0, 5) + ")" + ); + } + + /** + * Generates a string function that returns a boolean. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String stringToBoolFunction(List columns, boolean allowUnmapped) { + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField == null) { + return null; + } + String searchStr = "\"" + randomAlphaOfLength(randomIntBetween(1, 3)) + "\""; + return randomFrom( + "starts_with(" + stringField + ", " + searchStr + ")", + "ends_with(" + stringField + ", " + searchStr + ")", + "contains(" + stringField + ", " + searchStr + ")" + ); + } + + /** + * Generates a concat function with multiple arguments. + * May randomly include unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String concatFunction(List columns, boolean allowUnmapped) { + List stringFields = columns.stream() + .filter(c -> c.type().equals("keyword") || c.type().equals("text")) + .map(c -> needsQuoting(c.name()) ? quote(c.name()) : c.name()) + .limit(randomIntBetween(2, 4)) + .collect(Collectors.toList()); + if (stringFields.isEmpty()) { + return null; + } + if (allowUnmapped && shouldAddUnmappedField()) { + stringFields.add(randomUnmappedFieldName()); + } + if (stringFields.size() < 2) { + return null; + } + return "concat(" + String.join(", ", stringFields) + ")"; + } + + /** + * Generates a split function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String splitFunction(List columns, boolean allowUnmapped) { + String stringField = fieldOrUnmapped(randomStringField(columns), allowUnmapped); + if (stringField == null) { + return null; + } + String delimiter = randomFrom(",", " ", "-", "_", ":"); + return "split(" + stringField + ", \"" + delimiter + "\")"; + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/TypeConversionFunctionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/TypeConversionFunctionGenerator.java new file mode 100644 index 0000000000000..fa48a5206adca --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/TypeConversionFunctionGenerator.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.List; + +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomNumericField; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomStringField; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.fieldOrUnmapped; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.maybeUnmappedField; +import static org.elasticsearch.xpack.esql.generator.function.FunctionGeneratorUtils.randomScalarField; + +/** + * Generates random type conversion function expressions. + */ +public final class TypeConversionFunctionGenerator { + + private TypeConversionFunctionGenerator() {} + + /** + * Generates a type conversion function. + * May randomly use unmapped field names to test NULL data type handling. + * + * @param columns the available columns + * @param allowUnmapped if true, may use unmapped field names + */ + public static String conversionFunction(List columns, boolean allowUnmapped) { + String unmapped = maybeUnmappedField(allowUnmapped); + if (unmapped != null) { + return randomFrom( + "to_string(" + unmapped + ")", + "to_integer(" + unmapped + ")", + "to_long(" + unmapped + ")", + "to_double(" + unmapped + ")" + ); + } + + String anyField = randomScalarField(columns); + if (anyField != null && randomBoolean()) { + return "to_string(" + fieldOrUnmapped(anyField, allowUnmapped) + ")"; + } + + String numericField = randomNumericField(columns); + if (numericField != null) { + return randomFrom( + "to_integer(" + numericField + ")", + "to_long(" + numericField + ")", + "to_double(" + numericField + ")", + "to_string(" + numericField + ")", + "to_degrees(" + numericField + ")", + "to_radians(" + numericField + ")" + ); + } + + String stringField = randomStringField(columns); + if (stringField != null) { + return randomFrom("to_string(" + stringField + ")", "to_lower(" + stringField + ")"); + } + + return null; + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/TypeSafeExpressionGenerator.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/TypeSafeExpressionGenerator.java new file mode 100644 index 0000000000000..dd8b1b9ede997 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/generator/function/TypeSafeExpressionGenerator.java @@ -0,0 +1,95 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.generator.function; + +import org.elasticsearch.xpack.esql.generator.Column; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.function.Supplier; + +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomIntBetween; +import static org.elasticsearch.xpack.esql.generator.EsqlQueryGenerator.randomName; +import static org.elasticsearch.xpack.esql.generator.function.MathFunctionGenerator.binaryMathFunction; +import static org.elasticsearch.xpack.esql.generator.function.MathFunctionGenerator.clampFunction; +import static org.elasticsearch.xpack.esql.generator.function.MathFunctionGenerator.mathFunction; +import static org.elasticsearch.xpack.esql.generator.function.StringFunctionGenerator.concatFunction; +import static org.elasticsearch.xpack.esql.generator.function.StringFunctionGenerator.stringFunction; +import static org.elasticsearch.xpack.esql.generator.function.StringFunctionGenerator.stringToIntFunction; + +/** + * Helpers to generate expressions with a known compatible return type. + */ +public final class TypeSafeExpressionGenerator { + + private TypeSafeExpressionGenerator() {} + + /** + * Generates a random expression that is guaranteed to return one of the given accepted types. + * This should be used when the expression will be passed as an argument to a function with + * specific type constraints (e.g. top(), greatest/least, etc.). + *

+ * Prefers generating a function expression wrapping a compatible field, but falls back + * to a plain field reference if no function can be generated. + * + * @param columns the available columns + * @param acceptedTypes the set of types the calling function accepts (e.g. {"integer", "long", "double", "keyword", "date"}) + * @param allowUnmapped if true, may use unmapped field names + * @return an expression string whose output type is in acceptedTypes, or null if none can be generated + */ + public static String typeSafeExpression(List columns, Set acceptedTypes, boolean allowUnmapped) { + if (randomIntBetween(0, 10) < 5) { + String funcExpr = typeSafeFunctionExpression(columns, acceptedTypes, allowUnmapped); + if (funcExpr != null) { + return funcExpr; + } + } + return randomName(columns, acceptedTypes); + } + + private static String typeSafeFunctionExpression(List columns, Set acceptedTypes, boolean allowUnmapped) { + boolean acceptsNumeric = acceptedTypes.contains("integer") || acceptedTypes.contains("long") || acceptedTypes.contains("double"); + boolean acceptsString = acceptedTypes.contains("keyword") || acceptedTypes.contains("text"); + boolean acceptsDate = acceptedTypes.contains("date") || acceptedTypes.contains("datetime"); + + ArrayList> candidates = new ArrayList<>(); + + if (acceptsNumeric) { + candidates.add(() -> mathFunction(columns, allowUnmapped)); + candidates.add(() -> binaryMathFunction(columns, allowUnmapped)); + candidates.add(() -> stringToIntFunction(columns, allowUnmapped)); + candidates.add(() -> clampFunction(columns)); + } + if (acceptsString) { + candidates.add(() -> stringFunction(columns, allowUnmapped)); + candidates.add(() -> concatFunction(columns, allowUnmapped)); + } + if (acceptsDate) { + String dateField = randomName(columns, Set.of("date", "datetime")); + if (dateField != null) { + String interval = randomFrom("1 day", "1 hour", "1 week", "1 month", "1 year"); + candidates.add(() -> "date_trunc(" + interval + ", " + dateField + ")"); + candidates.add(() -> "now()"); + } + } + + if (candidates.isEmpty()) { + return null; + } + + for (int attempt = 0; attempt < 3; attempt++) { + String result = randomFrom(candidates).get(); + if (result != null) { + return result; + } + } + return null; + } +}